@@ -947,4 +947,67 @@ extern "C" void acc_combined(int N, int cond) {
947
947
// CHECK-NEXT: acc.yield
948
948
// CHECK-NEXT: } loc
949
949
950
+ #pragma acc parallel loop async
951
+ for (unsigned I = 0 ; I < N; ++I);
952
+ // CHECK-NEXT: acc.parallel combined(loop) async {
953
+ // CHECK-NEXT: acc.loop combined(parallel) {
954
+ // CHECK: acc.yield
955
+ // CHECK-NEXT: } loc
956
+ // CHECK-NEXT: acc.yield
957
+ // CHECK-NEXT: } loc
958
+
959
+ #pragma acc serial loop async(cond)
960
+ for (unsigned I = 0 ; I < N; ++I);
961
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
962
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
963
+ // CHECK-NEXT: acc.serial combined(loop) async(%[[CONV_CAST]] : si32) {
964
+ // CHECK-NEXT: acc.loop combined(serial) {
965
+ // CHECK: acc.yield
966
+ // CHECK-NEXT: } loc
967
+ // CHECK-NEXT: acc.yield
968
+ // CHECK-NEXT: } loc
969
+
970
+ #pragma acc kernels loop async device_type(nvidia, radeon) async
971
+ for (unsigned I = 0 ; I < N; ++I);
972
+ // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
973
+ // CHECK-NEXT: acc.loop combined(kernels) {
974
+ // CHECK: acc.yield
975
+ // CHECK-NEXT: } loc
976
+ // CHECK-NEXT: acc.terminator
977
+ // CHECK-NEXT: } loc
978
+
979
+ #pragma acc parallel loop async(3) device_type(nvidia, radeon) async(cond)
980
+ for (unsigned I = 0 ; I < N; ++I);
981
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
982
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
983
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
984
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
985
+ // CHECK-NEXT: acc.parallel combined(loop) async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
986
+ // CHECK-NEXT: acc.loop combined(parallel) {
987
+ // CHECK: acc.yield
988
+ // CHECK-NEXT: } loc
989
+ // CHECK-NEXT: acc.yield
990
+ // CHECK-NEXT: } loc
991
+
992
+ #pragma acc serial loop async device_type(nvidia, radeon) async(cond)
993
+ for (unsigned I = 0 ; I < N; ++I);
994
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
995
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
996
+ // CHECK-NEXT: acc.serial combined(loop) async([#acc.device_type<none>], %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
997
+ // CHECK-NEXT: acc.loop combined(serial) {
998
+ // CHECK: acc.yield
999
+ // CHECK-NEXT: } loc
1000
+ // CHECK-NEXT: acc.yield
1001
+ // CHECK-NEXT: } loc
1002
+
1003
+ #pragma acc kernels loop async(3) device_type(nvidia, radeon) async
1004
+ for (unsigned I = 0 ; I < N; ++I);
1005
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
1006
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
1007
+ // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[THREE_CAST]] : si32) {
1008
+ // CHECK-NEXT: acc.loop combined(kernels) {
1009
+ // CHECK: acc.yield
1010
+ // CHECK-NEXT: } loc
1011
+ // CHECK-NEXT: acc.terminator
1012
+ // CHECK-NEXT: } loc
950
1013
}
0 commit comments