From 9d21fb8f37dbeb36015546907e94f2281d95b7c9 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 22 Aug 2024 14:20:38 +0000 Subject: [PATCH 1/2] [mlir][LLVMIR][OpenMP] Move reduction allocas before stores Delay implicit reduction var stores until after all allocas. This fixes a couple of cases in existing unit tests. --- .../Lower/OpenMP/parallel-reduction-mixed.f90 | 4 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 53 +++++++++++-------- .../openmp-reduction-array-sections.mlir | 2 +- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index 1457be05ca102..262075ec9b25d 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -26,10 +26,10 @@ end subroutine proc !CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]] !CHECK: %[[F_priv:.*]] = alloca ptr !CHECK: %[[I_priv:.*]] = alloca i32 -!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] !CHECK: omp.reduction.init: -!CHECK: store i32 0, ptr %[[I_priv]] +!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] +!CHECK: store i32 0, ptr %[[I_priv]] !CHECK: omp.par.region: !CHECK: br label %[[MALLOC_BB:.*]] diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6d14d77c440e6..b64bd7d45a770 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -593,22 +593,23 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, } /// Allocate space for privatized reduction variables. +/// `deferredStores` contains information to create store operations which needs +/// to be inserted after all allocas template -static LogicalResult -allocReductionVars(T loop, ArrayRef reductionArgs, - llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, - SmallVectorImpl &reductionDecls, - SmallVectorImpl &privateReductionVariables, - DenseMap &reductionVariableMap, - llvm::ArrayRef isByRefs) { +static LogicalResult allocReductionVars( + T loop, ArrayRef reductionArgs, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &privateReductionVariables, + DenseMap &reductionVariableMap, + SmallVectorImpl> &deferredStores, + llvm::ArrayRef isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); // delay creating stores until after all allocas - SmallVector> storesToCreate; - storesToCreate.reserve(loop.getNumReductionVars()); + deferredStores.reserve(loop.getNumReductionVars()); for (std::size_t i = 0; i < loop.getNumReductionVars(); ++i) { Region &allocRegion = reductionDecls[i].getAllocRegion(); @@ -628,7 +629,7 @@ allocReductionVars(T loop, ArrayRef reductionArgs, // variable allocated in the inlined region) llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType())); - storesToCreate.emplace_back(phis[0], var); + deferredStores.emplace_back(phis[0], var); privateReductionVariables[i] = var; moduleTranslation.mapValue(reductionArgs[i], phis[0]); @@ -644,10 +645,6 @@ allocReductionVars(T loop, ArrayRef reductionArgs, } } - // TODO: further delay this so it doesn't come in the entry block at all - for (auto [data, addr] : storesToCreate) - builder.CreateStore(data, addr); - return success(); } @@ -819,12 +816,19 @@ static LogicalResult allocAndInitializeReductionVars( if (op.getNumReductionVars() == 0) return success(); + SmallVector> deferredStores; + if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation, allocaIP, reductionDecls, privateReductionVariables, reductionVariableMap, - isByRef))) + deferredStores, isByRef))) return failure(); + // store result of the alloc region to the allocated pointer to the real + // reduction variable + for (auto [data, addr] : deferredStores) + builder.CreateStore(data, addr); + // Before the loop, store the initial values of reductions into reduction // variables. Although this could be done after allocas, we don't want to mess // up with the alloca insertion point. @@ -1359,6 +1363,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, collectReductionDecls(opInst, reductionDecls); SmallVector privateReductionVariables( opInst.getNumReductionVars()); + SmallVector> deferredStores; auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars @@ -1373,10 +1378,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, InsertPointTy(allocaIP.getBlock(), allocaIP.getBlock()->getTerminator()->getIterator()); - if (failed(allocReductionVars(opInst, reductionArgs, builder, - moduleTranslation, allocaIP, reductionDecls, - privateReductionVariables, - reductionVariableMap, isByRef))) + if (failed(allocReductionVars( + opInst, reductionArgs, builder, moduleTranslation, allocaIP, + reductionDecls, privateReductionVariables, reductionVariableMap, + deferredStores, isByRef))) bodyGenStatus = failure(); // Initialize reduction vars @@ -1401,6 +1406,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(initBlock->getFirstNonPHIOrDbgOrAlloca()); + // insert stores deferred until after all allocas + // these store the results of the alloc region into the allocation for the + // pointer to the reduction variable + for (auto [data, addr] : deferredStores) + builder.CreateStore(data, addr); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector phis; diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir index da6f943004612..2d8a13ccd2a1f 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir @@ -89,7 +89,6 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: %[[VAL_13:.*]] = load i32, ptr %[[VAL_10]], align 4 // CHECK: %[[VAL_20:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 -// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_14:.*]] = alloca [1 x ptr], align 8 // CHECK: br label %[[VAL_15:.*]] // CHECK: omp.reduction.init: ; preds = %[[VAL_16:.*]] @@ -98,6 +97,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) attribute // CHECK: br label %[[VAL_18:.*]] // CHECK: omp.par.region1: ; preds = %[[VAL_17]] // CHECK: %[[VAL_19:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 // CHECK: br label %[[VAL_22:.*]] // CHECK: omp_section_loop.preheader: ; preds = %[[VAL_18]] // CHECK: store i32 0, ptr %[[VAL_7]], align 4 From 52c59f27c8e24835088c8b9937a2b81713fa1833 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 27 Aug 2024 10:16:01 +0000 Subject: [PATCH 2/2] Define a structure holding defered store args --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b64bd7d45a770..e1c3dbe60de4b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -592,19 +592,31 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, return bodyGenStatus; } +namespace { +/// Contains the arguments for an LLVM store operation +struct DeferredStore { + DeferredStore(llvm::Value *value, llvm::Value *address) + : value(value), address(address) {} + + llvm::Value *value; + llvm::Value *address; +}; +} // namespace + /// Allocate space for privatized reduction variables. /// `deferredStores` contains information to create store operations which needs /// to be inserted after all allocas template -static LogicalResult allocReductionVars( - T loop, ArrayRef reductionArgs, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, - SmallVectorImpl &reductionDecls, - SmallVectorImpl &privateReductionVariables, - DenseMap &reductionVariableMap, - SmallVectorImpl> &deferredStores, - llvm::ArrayRef isByRefs) { +static LogicalResult +allocReductionVars(T loop, ArrayRef reductionArgs, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &privateReductionVariables, + DenseMap &reductionVariableMap, + SmallVectorImpl &deferredStores, + llvm::ArrayRef isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); @@ -816,7 +828,7 @@ static LogicalResult allocAndInitializeReductionVars( if (op.getNumReductionVars() == 0) return success(); - SmallVector> deferredStores; + SmallVector deferredStores; if (failed(allocReductionVars(op, reductionArgs, builder, moduleTranslation, allocaIP, reductionDecls, @@ -1363,7 +1375,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, collectReductionDecls(opInst, reductionDecls); SmallVector privateReductionVariables( opInst.getNumReductionVars()); - SmallVector> deferredStores; + SmallVector deferredStores; auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars