From 235e0f5bcb18ae9d4549c908ae8d050de7d971bf Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Fri, 16 Apr 2021 15:08:56 +0100 Subject: [PATCH 1/5] [OpenMP IRBuilder, MLIR] Add support for OpenMP do schedule dynamic The implementation supports static schedule for Fortran do loops. This implements the dynamic variant of the same concept. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D97393 --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index af3f249582de4..9e87dbdd25525 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -119,7 +119,6 @@ enum class OMPScheduleType { ModifierNonmonotonic = (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierNonmonotonic) }; From a4473cbe5a0930795e62daed9547088465900695 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Mon, 10 May 2021 08:54:41 +0000 Subject: [PATCH 2/5] [OpenMP][MLIR]Add support for guided, auto and runtime scheduling When using parallel loop construct, the OpenMP specification allows for guided, auto and runtime as scheduling variants (as well as static and dynamic which are already supported). Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D101435 --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h | 1 + llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index 9e87dbdd25525..af3f249582de4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -119,6 +119,7 @@ enum class OMPScheduleType { ModifierNonmonotonic = (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierNonmonotonic) }; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 962dcc235983d..13c72439f2b6d 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1827,7 +1827,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_FALSE(verifyModule(*M, &errs())); } -INSTANTIATE_TEST_SUITE_P(OpenMPWSLoopSchedulingTypes, +INSTANTIATE_TEST_CASE_P(OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, ::testing::Values(omp::OMPScheduleType::DynamicChunked, omp::OMPScheduleType::GuidedChunked, From b80f391c3880f0b14a4eea3c8dfcb427b9d00835 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Fri, 30 Apr 2021 14:13:55 +0100 Subject: [PATCH 3/5] [OpenMP]Add support for workshare loop modifier in lowering When lowering the dynamic, guided, auto and runtime types of scheduling, there is an optional monotonic or non-monotonic modifier. This patch adds support in the OMP IR Builder to pass this down to the runtime functions. Also implements tests for the variants. Differential Revision: https://reviews.llvm.org/D102008 --- .../llvm/Frontend/OpenMP/OMPConstants.h | 8 +++-- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 +-- .../Frontend/OpenMPIRBuilderTest.cpp | 30 +++++++++++++------ 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index af3f249582de4..a05aa231eb516 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -117,10 +117,12 @@ enum class OMPScheduleType { Runtime = 37, Auto = 38, // auto + ModifierMonotonic = + (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = - (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ - - LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierNonmonotonic) + (1 << 30), // Set if the nonmonotonic schedule modifier was present + ModifierMask = ModifierMonotonic | ModifierNonmonotonic, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask) }; } // end namespace omp diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 74f91e5862e54..43c6dd9ab9972 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1431,10 +1431,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( Value *ThreadNum = getOrCreateThreadID(SrcLoc); - OMPScheduleType DynamicSchedType = - SchedType | OMPScheduleType::ModifierNonmonotonic; Constant *SchedulingType = - ConstantInt::get(I32Type, static_cast(DynamicSchedType)); + ConstantInt::get(I32Type, static_cast(SchedType)); // Call the "init" function. Builder.CreateCall(DynamicInit, diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 13c72439f2b6d..c2da3f30e27c0 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1721,7 +1721,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { omp::OMPScheduleType SchedType = GetParam(); uint32_t ChunkSize = 1; - switch (SchedType) { + switch (SchedType & ~omp::OMPScheduleType::ModifierMask) { case omp::OMPScheduleType::DynamicChunked: case omp::OMPScheduleType::GuidedChunked: ChunkSize = 7; @@ -1794,8 +1794,9 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_EQ(InitCall->getCalledFunction()->getName(), "__kmpc_dispatch_init_4u"); EXPECT_EQ(InitCall->getNumArgOperands(), 7U); - EXPECT_EQ(InitCall->getArgOperand(6), - ConstantInt::get(Type::getInt32Ty(Ctx), ChunkSize)); + EXPECT_EQ(InitCall->getArgOperand(6), ConstantInt::get(LCTy, ChunkSize)); + ConstantInt *SchedVal = cast(InitCall->getArgOperand(2)); + EXPECT_EQ(SchedVal->getValue(), static_cast(SchedType)); ConstantInt *OrigLowerBound = dyn_cast(LowerBoundStore->getValueOperand()); @@ -1827,12 +1828,23 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_FALSE(verifyModule(*M, &errs())); } -INSTANTIATE_TEST_CASE_P(OpenMPWSLoopSchedulingTypes, - OpenMPIRBuilderTestWithParams, - ::testing::Values(omp::OMPScheduleType::DynamicChunked, - omp::OMPScheduleType::GuidedChunked, - omp::OMPScheduleType::Auto, - omp::OMPScheduleType::Runtime)); +INSTANTIATE_TEST_CASE_P( + OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, + ::testing::Values(omp::OMPScheduleType::DynamicChunked, + omp::OMPScheduleType::GuidedChunked, + omp::OMPScheduleType::Auto, omp::OMPScheduleType::Runtime, + omp::OMPScheduleType::DynamicChunked | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::DynamicChunked | + omp::OMPScheduleType::ModifierNonmonotonic, + omp::OMPScheduleType::GuidedChunked | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::GuidedChunked | + omp::OMPScheduleType::ModifierNonmonotonic, + omp::OMPScheduleType::Auto | + omp::OMPScheduleType::ModifierMonotonic, + omp::OMPScheduleType::Runtime | + omp::OMPScheduleType::ModifierMonotonic)); TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; From 9054e8a533a490717107e6682dfc794434c8828f Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Tue, 6 Apr 2021 11:20:49 +0100 Subject: [PATCH 4/5] [Flang][OpenMP]Support for modifiers in workshare loops Pass the modifiers from the Flang parser to FIR/MLIR workshare loop operation. Not yet supporting the SIMD modifier, which is a bit more work than just adding it to the list of modifiers, so will go in a separate patch. This adds a new field to the WsLoopOp. Also add test for dynamic WSLoop, checking that dynamic schedule calls the init and next functions as expected. --- flang/lib/Lower/OpenMP.cpp | 32 ++++++ .../test/Lower/OpenMP/omp-wsloop-dynamic.f90 | 102 ++++++++++++++++++ .../Lower/OpenMP/omp-wsloop-monotonic.f90 | 100 +++++++++++++++++ .../Lower/OpenMP/omp-wsloop-nonmonotonic.f90 | 102 ++++++++++++++++++ flang/test/Lower/OpenMP/omp-wsloop.f90 | 6 +- .../Frontend/OpenMPIRBuilderTest.cpp | 2 +- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 13 +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 20 +++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 16 +++ mlir/test/Dialect/OpenMP/ops.mlir | 22 +++- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 32 +++++- 11 files changed, 435 insertions(+), 12 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 5438e6cc7dcb7..4c97982334fc9 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -336,6 +336,35 @@ genOMP(Fortran::lower::AbstractConverter &converter, } } +static mlir::omp::ScheduleModifier +translateModifier(const Fortran::parser::OmpScheduleModifierType &m) { + switch (m.v) { + case Fortran::parser::OmpScheduleModifierType::ModType::Monotonic: + return mlir::omp::ScheduleModifier::monotonic; + case Fortran::parser::OmpScheduleModifierType::ModType::Nonmonotonic: + return mlir::omp::ScheduleModifier::nonmonotonic; + default: + llvm_unreachable("Unknown case"); + } + return mlir::omp::ScheduleModifier::none; +} + +static mlir::omp::ScheduleModifier +getScheduleModifiers(const Fortran::parser::OmpScheduleClause &x) { + const auto &modifier = + std::get>(x.t); + if (modifier) { + const auto &modType1 = + std::get(modifier->t); + // TODO: Add support for SIMD, which means modType2 gets used. + // const auto &modType2 = std::get< + // std::optional>( + // modifier->t); + return translateModifier(modType1.v); + } + return mlir::omp::ScheduleModifier::none; +} + static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { @@ -479,6 +508,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, omp::ClauseScheduleKind::Runtime))); break; } + wsLoopOp.schedule_modifiersAttr( + firOpBuilder.getStringAttr(omp::stringifyScheduleModifier( + getScheduleModifiers(scheduleClause->v)))); } } // In FORTRAN `nowait` clause occur at the end of `omp do` directive. diff --git a/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 new file mode 100644 index 0000000000000..5ede05abe7235 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-dynamic.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, none) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, none) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 35, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 new file mode 100644 index 0000000000000..6e8c82bf05be8 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-monotonic.f90 @@ -0,0 +1,100 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(monotonic:dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, monotonic) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, monotonic) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 536870947, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %[[I]] : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 new file mode 100644 index 0000000000000..8df278fcbd665 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-nonmonotonic.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(nonmonotonic:dynamic) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(dynamic, nonmonotonic) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(dynamic, nonmonotonic) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 1073741859, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/flang/test/Lower/OpenMP/omp-wsloop.f90 b/flang/test/Lower/OpenMP/omp-wsloop.f90 index 640d657e9010d..216f6259a7f5f 100644 --- a/flang/test/Lower/OpenMP/omp-wsloop.f90 +++ b/flang/test/Lower/OpenMP/omp-wsloop.f90 @@ -27,11 +27,11 @@ program wsloop !FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 !FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 !FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 -!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(static) nowait inclusive +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(static, none) nowait inclusive !LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 !LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 -!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(static) nowait inclusive +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(static, none) nowait inclusive !LLVMIR: define internal void @_QQmain..omp_par !LLVMIR: omp.par.entry: @@ -81,10 +81,12 @@ program wsloop end do !FIRDialect: omp.yield +!FIRDialect: } !FIRDialect: omp.terminator !FIRDialect: } !LLVMIRDialect: omp.yield +!LLVMIRDialect: } !LLVMIRDialect: omp.terminator !LLVMIRDialect: } !LLVMIRDialect: llvm.return diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index c2da3f30e27c0..c9e4bed4154be 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1828,7 +1828,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_FALSE(verifyModule(*M, &errs())); } -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( OpenMPWSLoopSchedulingTypes, OpenMPIRBuilderTestWithParams, ::testing::Values(omp::OMPScheduleType::DynamicChunked, omp::OMPScheduleType::GuidedChunked, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index be1e9915484ba..defb6f8dd03bf 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -114,6 +114,18 @@ def TerminatorOp : OpenMP_Op<"terminator", [Terminator]> { let assemblyFormat = "attr-dict"; } +def OMP_SCHEDULE_MOD_None : StrEnumAttrCase<"none", 0>; +def OMP_SCHEDULE_MOD_Monotonic : StrEnumAttrCase<"monotonic", 1>; +def OMP_SCHEDULE_MOD_Nonmonotonic : StrEnumAttrCase<"nonmonotonic", 2>; + +def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", + [OMP_SCHEDULE_MOD_None, + OMP_SCHEDULE_MOD_Monotonic, + OMP_SCHEDULE_MOD_Nonmonotonic]> +{ + let cppNamespace = "::mlir::omp"; +} + //===----------------------------------------------------------------------===// // 2.9.2 Workshare Loop Construct //===----------------------------------------------------------------------===// @@ -178,6 +190,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, Variadic:$linear_step_vars, OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, + OptionalAttr:$schedule_modifiers, Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3f689ebc0e023..47e51935d54e6 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -413,6 +413,7 @@ parseLinearClause(OpAsmParser &parser, /// sched-wo-chunk ::= `auto` | `runtime` static ParseResult parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, + SmallVector> &modifiers, Optional &chunkSize) { if (parser.parseLParen()) return failure(); @@ -436,6 +437,14 @@ parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, return parser.emitError(parser.getNameLoc()) << " expected schedule kind"; } + // If there is a comma, we have one or more modifiers.. + if (succeeded(parser.parseOptionalComma())) { + StringRef mod; + if (parser.parseKeyword(&mod)) + return failure(); + modifiers.push_back(mod); + } + if (parser.parseRParen()) return failure(); @@ -507,6 +516,7 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { SmallVector linearTypes; SmallVector linearSteps; SmallString<8> schedule; + SmallVector> modifiers; Optional scheduleChunkSize; std::array segments{numIVs, numIVs, numIVs, 0, 0, 0, 0, 0, 0}; @@ -557,7 +567,7 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { } else if (keyword == "schedule") { if (!schedule.empty()) return allowedOnce(parser, "schedule", opName); - if (parseScheduleClause(parser, schedule, scheduleChunkSize)) + if (parseScheduleClause(parser, schedule, modifiers, scheduleChunkSize)) return failure(); if (scheduleChunkSize) { segments[scheduleClausePos] = 1; @@ -626,6 +636,11 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { schedule[0] = llvm::toUpper(schedule[0]); auto attr = parser.getBuilder().getStringAttr(schedule); result.addAttribute("schedule_val", attr); + if (modifiers.size() > 0) + { + auto mod = parser.getBuilder().getStringAttr(modifiers[0]); + result.addAttribute("schedule_modifiers", mod); + } if (scheduleChunkSize) { auto chunkSizeType = parser.getBuilder().getI32Type(); parser.resolveOperand(*scheduleChunkSize, chunkSizeType, result.operands); @@ -684,6 +699,9 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { if (auto chunk = op.schedule_chunk_var()) { p << " = " << chunk; } + if (auto modifier = op.schedule_modifiers()) { + p << ", " << modifier; + } p << ")"; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6259612d5112b..09d1b4387c654 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -299,6 +299,22 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, break; } + if (loop.schedule_modifiers().hasValue()) { + omp::ScheduleModifier modifier = + *omp::symbolizeScheduleModifier( + loop.schedule_modifiers().getValue()); + switch (modifier) { + case omp::ScheduleModifier::monotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; + break; + case omp::ScheduleModifier::nonmonotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; + break; + default: + // Nothing to do here. + break; + } + } afterIP = ompBuilder->createDynamicWorkshareLoop( ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 8f7f9c1ca69ca..34c37e52ef1d7 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -176,15 +176,29 @@ func @omp_wsloop_pretty(%lb : index, %ub : index, %step : index, omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static, none) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static, none) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}, none) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(static = %chunk_var, none) collapse(3) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, nonmonotonic) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(dynamic = %chunk_var, nonmonotonic) collapse(3) { omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}) collapse(3) ordered(2) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, monotonic) collapse(3) ordered(2) omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) - schedule(static = %chunk_var) collapse(3) { + schedule(dynamic = %chunk_var, monotonic) collapse(3) { omp.yield } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index e9d472d2e602e..34e3a8242b840 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -420,7 +420,7 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -432,7 +432,7 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -444,7 +444,7 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -456,7 +456,7 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -466,3 +466,27 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { } llvm.return } + +llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} From cba722241a36b1f987d7f89ae477c577642f26c2 Mon Sep 17 00:00:00 2001 From: Mats Petersson Date: Wed, 26 May 2021 10:35:45 +0100 Subject: [PATCH 5/5] [Flang][OpenMP] Add support for SIMD modifier Add support for SIMD modifier in OpenMP worksharing loops. --- flang/lib/Lower/OpenMP.cpp | 46 ++++++-- flang/test/Lower/OpenMP/omp-wsloop-simd.f90 | 102 ++++++++++++++++++ .../llvm/Frontend/OpenMP/OMPConstants.h | 4 + mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 5 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 19 ++-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 17 ++- 6 files changed, 178 insertions(+), 15 deletions(-) create mode 100644 flang/test/Lower/OpenMP/omp-wsloop-simd.f90 diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 4c97982334fc9..bf1d1b0155a58 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -343,8 +343,8 @@ translateModifier(const Fortran::parser::OmpScheduleModifierType &m) { return mlir::omp::ScheduleModifier::monotonic; case Fortran::parser::OmpScheduleModifierType::ModType::Nonmonotonic: return mlir::omp::ScheduleModifier::nonmonotonic; - default: - llvm_unreachable("Unknown case"); + case Fortran::parser::OmpScheduleModifierType::ModType::Simd: + return mlir::omp::ScheduleModifier::simd; } return mlir::omp::ScheduleModifier::none; } @@ -353,18 +353,50 @@ static mlir::omp::ScheduleModifier getScheduleModifiers(const Fortran::parser::OmpScheduleClause &x) { const auto &modifier = std::get>(x.t); + // The input may have the modifier any order, so we look for one that isn't + // SIMD. If modifier is not set at all, fall down to the bottom and return + // "none". if (modifier) { const auto &modType1 = std::get(modifier->t); - // TODO: Add support for SIMD, which means modType2 gets used. - // const auto &modType2 = std::get< - // std::optional>( - // modifier->t); + if (modType1.v.v == + Fortran::parser::OmpScheduleModifierType::ModType::Simd) { + const auto &modType2 = std::get< + std::optional>( + modifier->t); + if (modType2->v.v != + Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return translateModifier(modType2->v); + } + return translateModifier(modType1.v); } return mlir::omp::ScheduleModifier::none; } +static mlir::omp::ScheduleModifier +getSIMDModifier(const Fortran::parser::OmpScheduleClause &x) { + const auto &modifier = + std::get>(x.t); + // Either of the two possible modifiers in the input can be the SIMD modifier, + // so look in either one, and return simd if we find one. Not found = return + // "none". + if (modifier) { + const auto &modType1 = + std::get(modifier->t); + if (modType1.v.v == Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return mlir::omp::ScheduleModifier::simd; + + const auto &modType2 = std::get< + std::optional>( + modifier->t); + if (modType2->v.v == + Fortran::parser::OmpScheduleModifierType::ModType::Simd) + return mlir::omp::ScheduleModifier::simd; + } + return mlir::omp::ScheduleModifier::none; +} + static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { @@ -511,6 +543,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, wsLoopOp.schedule_modifiersAttr( firOpBuilder.getStringAttr(omp::stringifyScheduleModifier( getScheduleModifiers(scheduleClause->v)))); + wsLoopOp.simd_modifierAttr(firOpBuilder.getStringAttr( + omp::stringifyScheduleModifier(getSIMDModifier(scheduleClause->v)))); } } // In FORTRAN `nowait` clause occur at the end of `omp do` directive. diff --git a/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 b/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 new file mode 100644 index 0000000000000..fde0e29b2114e --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-wsloop-simd.f90 @@ -0,0 +1,102 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing). + +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: FileCheck %s --check-prefix=FIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco --disable-llvm --print-ir-after=fir-to-llvm-ir 2>&1 | \ +! RUN: FileCheck %s --check-prefix=LLVMIRDialect +! RUN: bbc -fopenmp -emit-fir %s -o - | \ +! RUN: tco | FileCheck %s --check-prefix=LLVMIR + +program wsloop_dynamic + integer :: i +!FIRDialect: func @_QQmain() +!LLVMIRDialect: func @_QQmain() + +!LLVMIR: define void @_QQmain() +!LLVMIR:call i32 @__kmpc_global_thread_num{{.*}} +!LLVMIR: br label %omp_parallel + +!$OMP PARALLEL +!FIRDialect-LABLEL: omp.parallel { +!LLVMIRDialect-LABLEL: omp.parallel { + +!LLVMIR: omp_parallel: ; preds = %0 +!LLVMIR: @__kmpc_fork_call +!$OMP DO SCHEDULE(simd: runtime) +!FIRDialect: %[[WS_LB:.*]] = constant 1 : i32 +!FIRDialect: %[[WS_UB:.*]] = constant 9 : i32 +!FIRDialect: %[[WS_STEP:.*]] = constant 1 : i32 +!FIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) step (%[[WS_STEP]]) schedule(runtime, none, simd) nowait inclusive + +!LLVMIRDialect: %[[WS_UB:.*]] = llvm.mlir.constant(9 : i32) : i32 +!LLVMIRDialect: %[[WS_LB_STEP:.*]] = llvm.mlir.constant(1 : i32) : i32 +!LLVMIRDialect: omp.wsloop (%[[I:.*]]) : i32 = (%[[WS_LB_STEP]]) to (%[[WS_UB]]) step (%[[WS_LB_STEP]]) schedule(runtime, none, simd) nowait inclusive + +!LLVMIR: define internal void @_QQmain..omp_par +!LLVMIR: omp.par.entry: +!LLVMIR: br label %omp.par.region +!LLVMIR: omp.par.outlined.exit.exitStub: ; preds = %omp.par.pre_finalize +!LLVMIR: ret void +!LLVMIR: omp.par.region: ; preds = %omp.par.entry +!LLVMIR: br label %omp.par.region1 +!LLVMIR: omp.par.region1: ; preds = %omp.par.region +!LLVMIR: br label %omp_loop.preheader +!LLVMIR: omp_loop.preheader: ; preds = %omp.par.region1 +!LLVMIR: @__kmpc_global_thread_num +!LLVMIR: @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %omp_global_thread_num{{.*}}, i32 47, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}) +!LLVMIR: br label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.preheader.outer.cond: +!LLVMIR: @__kmpc_dispatch_next_4u +!LLVMIR: %{{.*}} = icmp ne i32 %{{.*}}, 0 +!LLVMIR: %{{.*}} = load i32, i32* %p.lowerbound, align 4 +!LLVMIR: %{{.*}} = sub i32 %{{.*}}, 1 +!LLVMIR: br i1 %{{.*}}, label %omp_loop.header, label %omp_loop.exit +!LLVMIR: omp_loop.exit: ; preds = %omp_loop.preheader.outer.cond +!LLVMIR: br label %omp_loop.after +!LLVMIR: omp_loop.header: ; preds = %omp_loop.preheader.outer.cond, %omp_loop.inc +!LLVMIR: %omp_loop.iv = phi i32 [ %lb, %omp_loop.preheader.outer.cond ], [ %omp_loop.next, %omp_loop.inc ] + +do i=1, 9 +print*, i +!FIRDialect: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput +!FIRDialect: %[[CONVERTED:.*]] = fir.convert %[[I]] : (i32) -> i64 +!FIRDialect: fir.call @_FortranAioOutputInteger64(%[[RTBEGIN]], %[[CONVERTED]]) : (!fir.ref, i64) -> i1 +!FIRDialect: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) : (!fir.ref) -> i32 + + +!LLVMIRDialect: llvm.call @_FortranAioBeginExternalListOutput(%{{.*}}, %{{.*}}, %{{.*}}) : (i32, !llvm.ptr, i32) -> !llvm.ptr +!LLVMIRDialect: %{{.*}} = llvm.sext %arg0 : i32 to i64 +!LLVMIRDialect: llvm.call @_FortranAioOutputInteger64(%{{.*}}, %{{.*}}) : (!llvm.ptr, i64) -> i1 +!LLVMIRDialect: llvm.call @_FortranAioEndIoStatement(%{{.*}}) : (!llvm.ptr) -> i32 + +!LLVMIR: br label %omp_loop.cond +!LLVMIR: omp_loop.cond: ; preds = %omp_loop.header +!LLVMIR %{{.*}} = load i32, i32* %{{.*}}, aling {{.*}} +!LLVMIR: %omp_loop.cmp = icmp ult i32 %{{.*}}, %{{.*}} +!LLVMIR: br i1 %omp_loop.cmp, label %omp_loop.body, label %omp_loop.preheader.outer.cond +!LLVMIR: omp_loop.body: ; preds = %omp_loop.cond +!LLVMIR: %{{.*}} = mul i32 %{{.*}}, 1 +!LLVMIR: %{{.*}} = add i32 %{{.*}}, 1 +!LLVMIR: br label %omp.wsloop.region +!LLVMIR: omp.wsloop.region: ; preds = %omp_loop.body +!LLVMIR: %{{.*}} = call i8* @_FortranAioBeginExternalListOutput +!LLVMIR: %{{.*}} = sext i32 %{{.*}} to i64 +!LLVMIR: %{{.*}} = call i1 @_FortranAioOutputInteger64 +!LLVMIR: %{{.*}} = call i32 @_FortranAioEndIoStatement + +end do +!FIRDialect: omp.yield +!FIRDialect: } +!FIRDialect: omp.terminator +!FIRDialect: } + +!LLVMIRDialect: omp.yield +!LLVMIRDialect: } +!LLVMIRDialect: omp.terminator +!LLVMIRDialect: } +!LLVMIRDialect: llvm.return +!LLVMIRDialect: } +!$OMP END DO NOWAIT +!$OMP END PARALLEL +end diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index a05aa231eb516..1553446429e58 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -117,6 +117,10 @@ enum class OMPScheduleType { Runtime = 37, Auto = 38, // auto + StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) + GuidedSimd = 46, // guided with chunk adjustment + RuntimeSimd = 47, // runtime with chunk adjustment + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index defb6f8dd03bf..65abdfee63088 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -117,11 +117,13 @@ def TerminatorOp : OpenMP_Op<"terminator", [Terminator]> { def OMP_SCHEDULE_MOD_None : StrEnumAttrCase<"none", 0>; def OMP_SCHEDULE_MOD_Monotonic : StrEnumAttrCase<"monotonic", 1>; def OMP_SCHEDULE_MOD_Nonmonotonic : StrEnumAttrCase<"nonmonotonic", 2>; +def OMP_SCHEDULE_MOD_SIMD : StrEnumAttrCase<"simd", 3>; def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", [OMP_SCHEDULE_MOD_None, OMP_SCHEDULE_MOD_Monotonic, - OMP_SCHEDULE_MOD_Nonmonotonic]> + OMP_SCHEDULE_MOD_Nonmonotonic, + OMP_SCHEDULE_MOD_SIMD]> { let cppNamespace = "::mlir::omp"; } @@ -191,6 +193,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, OptionalAttr:$schedule_modifiers, + OptionalAttr:$simd_modifier, Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 47e51935d54e6..3d74ddd1790cd 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -413,7 +413,7 @@ parseLinearClause(OpAsmParser &parser, /// sched-wo-chunk ::= `auto` | `runtime` static ParseResult parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, - SmallVector> &modifiers, + SmallVectorImpl> &modifiers, Optional &chunkSize) { if (parser.parseLParen()) return failure(); @@ -438,7 +438,7 @@ parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, } // If there is a comma, we have one or more modifiers.. - if (succeeded(parser.parseOptionalComma())) { + while (succeeded(parser.parseOptionalComma())) { StringRef mod; if (parser.parseKeyword(&mod)) return failure(); @@ -636,10 +636,13 @@ static ParseResult parseWsLoopOp(OpAsmParser &parser, OperationState &result) { schedule[0] = llvm::toUpper(schedule[0]); auto attr = parser.getBuilder().getStringAttr(schedule); result.addAttribute("schedule_val", attr); - if (modifiers.size() > 0) - { - auto mod = parser.getBuilder().getStringAttr(modifiers[0]); - result.addAttribute("schedule_modifiers", mod); + if (modifiers.size() > 0) { + auto mod = parser.getBuilder().getStringAttr(modifiers[0]); + result.addAttribute("schedule_modifiers", mod); + if (modifiers.size() > 1) { + mod = parser.getBuilder().getStringAttr(modifiers[1]); + result.addAttribute("simd_modifier", mod); + } } if (scheduleChunkSize) { auto chunkSizeType = parser.getBuilder().getI32Type(); @@ -702,6 +705,10 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { if (auto modifier = op.schedule_modifiers()) { p << ", " << modifier; } + auto simd = op.simd_modifier(); + if (simd.hasValue() && *simd != "none") { + p << ", " << simd; + } p << ")"; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 09d1b4387c654..cf864cb046101 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -275,6 +275,13 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::InsertPointTy afterIP; llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + bool isSimd = false; + if (auto simd = loop.simd_modifier()) { + omp::ScheduleModifier modifier = *omp::symbolizeScheduleModifier(*simd); + isSimd = (modifier == omp::ScheduleModifier::simd); + } + if (schedule == omp::ClauseScheduleKind::Static) { loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP, !loop.nowait(), chunk); @@ -286,13 +293,19 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; break; case omp::ClauseScheduleKind::Auto: schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - schedType = llvm::omp::OMPScheduleType::Runtime; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; break; default: llvm_unreachable("Unknown schedule value");