diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 5438e6cc7dcb7..bf1d1b0155a58 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -336,6 +336,67 @@ genOMP(Fortran::lower::AbstractConverter &converter, } } +static mlir::omp::ScheduleModifier +translateModifier(const Fortran::parser::OmpScheduleModifierType &m) { + switch (m.v) { + case Fortran::parser::OmpScheduleModifierType::ModType::Monotonic: + return mlir::omp::ScheduleModifier::monotonic; + case Fortran::parser::OmpScheduleModifierType::ModType::Nonmonotonic: + return mlir::omp::ScheduleModifier::nonmonotonic; + case Fortran::parser::OmpScheduleModifierType::ModType::Simd: + return mlir::omp::ScheduleModifier::simd; + } + return mlir::omp::ScheduleModifier::none; +} + +static mlir::omp::ScheduleModifier +getScheduleModifiers(const Fortran::parser::OmpScheduleClause &x) { + const auto &modifier = + std::get>(x.t); + // The input may have the modifier any order, so we look for one that isn't + // SIMD. If modifier is not set at all, fall down to the bottom and return + // "none". + if (modifier) { + const auto &modType1 = + std::get(modifier->t); + if (modType1.v.v == + Fortran::parser::OmpScheduleModifierType::ModType::Simd) { + const auto &modType2 = std::get< + std::optional>( + modifier->t); + if (modType2->v.v != + Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return translateModifier(modType2->v); + } + + return translateModifier(modType1.v); + } + return mlir::omp::ScheduleModifier::none; +} + +static mlir::omp::ScheduleModifier +getSIMDModifier(const Fortran::parser::OmpScheduleClause &x) { + const auto &modifier = + std::get>(x.t); + // Either of the two possible modifiers in the input can be the SIMD modifier, + // so look in either one, and return simd if we find one. Not found = return + // "none". + if (modifier) { + const auto &modType1 = + std::get(modifier->t); + if (modType1.v.v == Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return mlir::omp::ScheduleModifier::simd; + + const auto &modType2 = std::get< + std::optional>( + modifier->t); + if (modType2->v.v == + Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return mlir::omp::ScheduleModifier::simd; + } + return mlir::omp::ScheduleModifier::none; +} + static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { @@ -479,6 +540,11 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, omp::ClauseScheduleKind::Runtime))); break; } + wsLoopOp.schedule_modifiersAttr( + firOpBuilder.getStringAttr(omp::stringifyScheduleModifier( + getScheduleModifiers(scheduleClause->v)))); + wsLoopOp.simd_modifierAttr(firOpBuilder.getStringAttr( + omp::stringifyScheduleModifier(getSIMDModifier(scheduleClause->v)))); } } // In FORTRAN `nowait` clause occur at the end of `omp do` directive. diff --git a/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 new file mode 100644 index 0000000000000..5ede05abe7235 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, none) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, none) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 35, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 new file mode 100644 index 0000000000000..6e8c82bf05be8 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 @@ -0,0 +1,100 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(monotonic:dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, monotonic) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, monotonic) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 536870947, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %[[I]] : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 new file mode 100644 index 0000000000000..8df278fcbd665 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(nonmonotonic:dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, nonmonotonic) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, nonmonotonic) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 1073741859, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 b/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 new file mode 100644 index 0000000000000..fde0e29b2114e --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(simd: runtime) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(runtime, none, simd) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(runtime, none, simd) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 47, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop.f90 b/flang/test/Lower/OpenMP/omp-wsloop.f90 index 640d657e9010d..216f6259a7f5f 100644 --- a/flang/test/Lower/OpenMP/omp-wsloop.f90 +++ b/flang/test/Lower/OpenMP/omp-wsloop.f90 @@ -27,11 +27,11 @@ program wsloop !FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 !FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 !FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 -!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(static) nowait inclusive +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(static, none) nowait inclusive !LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 !LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 -!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(static) nowait inclusive +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(static, none) nowait inclusive !LLVMIR: define internal void @_QQmain..omp_par !LLVMIR: omp.par.entry: @@ -81,10 +81,12 @@ program wsloop end do !FIRDialect: omp.yield +!FIRDialect: } !FIRDialect: omp.terminator !FIRDialect: } !LLVMIRDialect: omp.yield +!LLVMIRDialect: } !LLVMIRDialect: omp.terminator !LLVMIRDialect: } !LLVMIRDialect: llvm.return diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index af3f249582de4..1553446429e58 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -117,10 +117,16 @@ enum class OMPScheduleType { Runtime = 37, Auto = 38, // auto - ModifierNonmonotonic = - (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ + StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) + GuidedSimd = 46, // guided with chunk adjustment + RuntimeSimd = 47, // runtime with chunk adjustment - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierNonmonotonic) + ModifierMonotonic = + (1 << 29), // Set if the monotonic schedule modifier was present + ModifierNonmonotonic = + (1 << 30), // Set if the nonmonotonic schedule modifier was present + ModifierMask = ModifierMonotonic | ModifierNonmonotonic, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask) }; } // end namespace omp diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 74f91e5862e54..43c6dd9ab9972 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1431,10 +1431,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( Value *ThreadNum = getOrCreateThreadID(SrcLoc); - OMPScheduleType DynamicSchedType = - SchedType | OMPScheduleType::ModifierNonmonotonic; Constant *SchedulingType = - ConstantInt::get(I32Type, static_cast(DynamicSchedType)); + ConstantInt::get(I32Type, static_cast(SchedType)); // Call the "init" function. Builder.CreateCall(DynamicInit, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 962dcc235983d..c9e4bed4154be 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1721,7 +1721,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { omp::OMPScheduleType SchedType = GetParam(); uint32_t ChunkSize = 1; - switch (SchedType) { + switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { case omp::OMPScheduleType::DynamicChunked: case omp::OMPScheduleType::GuidedChunked: ChunkSize = 7; @@ -1794,8 +1794,9 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_dispatch_init_4u"); EXPECT_EQ(InitCall->getNumArgOperands(), 7U); - EXPECT_EQ(InitCall->getArgOperand(6), - ConstantInt::get(Type::getInt32Ty(Ctx), ChunkSize)); + EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); + ConstantInt *SchedVal = cast(InitCall->getArgOperand(2)); + EXPECT_EQ(SchedVal->getValue(), static_cast(SchedType)); ConstantInt *OrigLowerBound = dyn_cast(LowerBoundStore->getValueOperand()); @@ -1827,12 +1828,23 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_FALSE(verifyModule(*M, &errs())); } -INSTANTIATE_TEST_SUITE_P(OpenMPWSLoopSchedulingTypes, - OpenMPIRBuilderTestWithParams, - ::testing::Values(omp::OMPScheduleType::DynamicChunked, - omp::OMPScheduleType::GuidedChunked, - omp::OMPScheduleType::Auto, - omp::OMPScheduleType::Runtime)); +INSTANTIATE_TEST_SUITE_P( + OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, + ::testing::Values(omp::OMPScheduleType::DynamicChunked, + omp::OMPScheduleType::GuidedChunked, + omp::OMPScheduleType::Auto, omp::OMPScheduleType::Runtime, + omp::OMPScheduleType::DynamicChunked | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::DynamicChunked | + omp::OMPScheduleType::ModifierNonmonotonic, + omp::OMPScheduleType::GuidedChunked | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::GuidedChunked | + omp::OMPScheduleType::ModifierNonmonotonic, + omp::OMPScheduleType::Auto | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::Runtime | + omp::OMPScheduleType::ModifierMonotonic)); TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index be1e9915484ba..65abdfee63088 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -114,6 +114,20 @@ def TerminatorOp : OpenMP_Op<"terminator", [Terminator]> { let assemblyFormat = "attr-dict"; } +def OMP_SCHEDULE_MOD_None : StrEnumAttrCase<"none", 0>; +def OMP_SCHEDULE_MOD_Monotonic : StrEnumAttrCase<"monotonic", 1>; +def OMP_SCHEDULE_MOD_Nonmonotonic : StrEnumAttrCase<"nonmonotonic", 2>; +def OMP_SCHEDULE_MOD_SIMD : StrEnumAttrCase<"simd", 3>; + +def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", + [OMP_SCHEDULE_MOD_None, + OMP_SCHEDULE_MOD_Monotonic, + OMP_SCHEDULE_MOD_Nonmonotonic, + OMP_SCHEDULE_MOD_SIMD]> +{ + let cppNamespace = "::mlir::omp"; +} + //===----------------------------------------------------------------------===// // 2.9.2 Workshare Loop Construct //===----------------------------------------------------------------------===// @@ -178,6 +192,8 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, Variadic:$linear_step_vars, OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, + OptionalAttr:$schedule_modifiers, + OptionalAttr:$simd_modifier, Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3f689ebc0e023..3d74ddd1790cd 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -413,6 +413,7 @@ parseLinearClause(OpAsmParser &parser, /// sched-wo-chunk ::= `auto` | `runtime` static ParseResult parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, + SmallVectorImpl> &modifiers, Optional &chunkSize) { if (parser.parseLParen()) return failure(); @@ -436,6 +437,14 @@ parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, return parser.emitError(parser.getNameLoc()) << " expected schedule kind"; } + // If there is a comma, we have one or more modifiers.. + while (succeeded(parser.parseOptionalComma())) { + StringRef mod; + if (parser.parseKeyword(&mod)) + return failure(); + modifiers.push_back(mod); + } + if (parser.parseRParen()) return failure(); @@ -507,6 +516,7 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { SmallVector linearTypes; SmallVector linearSteps; SmallString<8> schedule; + SmallVector> modifiers; Optional scheduleChunkSize; std::array segments{numIVs, numIVs, numIVs, 0, 0, 0, 0, 0, 0}; @@ -557,7 +567,7 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { } else if (keyword == "schedule") { if (!schedule.empty()) return allowedOnce(parser, "schedule", opName); - if (parseScheduleClause(parser, schedule, scheduleChunkSize)) + if (parseScheduleClause(parser, schedule, modifiers, scheduleChunkSize)) return failure(); if (scheduleChunkSize) { segments[scheduleClausePos] = 1; @@ -626,6 +636,14 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { schedule[0] = llvm::toUpper(schedule[0]); auto attr = parser.getBuilder().getStringAttr(schedule); result.addAttribute("schedule_val", attr); + if (modifiers.size() > 0) { + auto mod = parser.getBuilder().getStringAttr(modifiers[0]); + result.addAttribute("schedule_modifiers", mod); + if (modifiers.size() > 1) { + mod = parser.getBuilder().getStringAttr(modifiers[1]); + result.addAttribute("simd_modifier", mod); + } + } if (scheduleChunkSize) { auto chunkSizeType = parser.getBuilder().getI32Type(); parser.resolveOperand(*scheduleChunkSize, chunkSizeType, result.operands); @@ -684,6 +702,13 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { if (auto chunk = op.schedule_chunk_var()) { p << " = " << chunk; } + if (auto modifier = op.schedule_modifiers()) { + p << ", " << modifier; + } + auto simd = op.simd_modifier(); + if (simd.hasValue() && *simd != "none") { + p << ", " << simd; + } p << ")"; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6259612d5112b..cf864cb046101 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -275,6 +275,13 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::InsertPointTy afterIP; llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + bool isSimd = false; + if (auto simd = loop.simd_modifier()) { + omp::ScheduleModifier modifier = *omp::symbolizeScheduleModifier(*simd); + isSimd = (modifier == omp::ScheduleModifier::simd); + } + if (schedule == omp::ClauseScheduleKind::Static) { loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP, !loop.nowait(), chunk); @@ -286,19 +293,41 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; break; case omp::ClauseScheduleKind::Auto: schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - schedType = llvm::omp::OMPScheduleType::Runtime; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; break; default: llvm_unreachable("Unknown schedule value"); break; } + if (loop.schedule_modifiers().hasValue()) { + omp::ScheduleModifier modifier = + *omp::symbolizeScheduleModifier( + loop.schedule_modifiers().getValue()); + switch (modifier) { + case omp::ScheduleModifier::monotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; + break; + case omp::ScheduleModifier::nonmonotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; + break; + default: + // Nothing to do here. + break; + } + } afterIP = ompBuilder->createDynamicWorkshareLoop( ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 8f7f9c1ca69ca..34c37e52ef1d7 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -176,15 +176,29 @@ func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static, none) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static, none) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}, none) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(static = %chunk_var, none) collapse(3) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, nonmonotonic) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(dynamic = %chunk_var, nonmonotonic) collapse(3) { omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}) collapse(3) ordered(2) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, monotonic) collapse(3) ordered(2) omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) - schedule(static = %chunk_var) collapse(3) { + schedule(dynamic = %chunk_var, monotonic) collapse(3) { omp.yield } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index e9d472d2e602e..34e3a8242b840 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -420,7 +420,7 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -432,7 +432,7 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -444,7 +444,7 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -456,7 +456,7 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -466,3 +466,27 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.return } + +llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +}