-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[OMPIRBuilder] Don't use invalid debug loc in reduction functions. #147950
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
We have this pattern of code in OMPIRBuilder for many functions that are used in reduction operations. Function *LtGRFunc = Function::Create BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc); Builder.SetInsertPoint(EntryBlock); The insertion point is moved to the new function but the debug location is not updated. This means that reduction function will use the debug location that points to another function. This problem gets hidden because these functions gets inlined but the potential for failure exists. This patch resets the debug location when insertion point is moved to new function. Some InsertPointGuard have been added to make sure why restore the debug location correctly when we are done with the reduction function.
@llvm/pr-subscribers-mlir-llvm @llvm/pr-subscribers-flang-openmp Author: Abid Qadeer (abidh) ChangesWe have this pattern of code in OMPIRBuilder for many functions that are used in reduction operations.
The insertion point is moved to the new function but the debug location is not updated. This means that reduction function will use the debug location that points to another function. This problem gets hidden because these functions gets inlined but the potential for failure exists. This patch resets the debug location when insertion point is moved to new function. Some Full diff: https://github.com/llvm/llvm-project/pull/147950.diff 2 Files Affected:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index db792a3b52d24..170224616ac64 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
AttributeList FuncAttrs) {
- InsertPointTy SavedIP = Builder.saveIP();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
@@ -2630,6 +2630,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
WcFunc->addParamAttr(1, Attribute::NoUndef);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
Builder.SetInsertPoint(EntryBB);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
@@ -2844,7 +2845,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
}
Builder.CreateRetVoid();
- Builder.restoreIP(SavedIP);
return WcFunc;
}
@@ -2853,6 +2853,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
AttributeList FuncAttrs) {
LLVMContext &Ctx = M.getContext();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
FunctionType *FuncTy =
FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getInt16Ty(),
@@ -2871,6 +2872,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
SarFunc->addParamAttr(3, Attribute::SExt);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
Builder.SetInsertPoint(EntryBB);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Thread local Reduce list used to host the values of data to be reduced.
Argument *ReduceListArg = SarFunc->getArg(0);
@@ -3017,7 +3019,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3033,6 +3035,7 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3120,14 +3123,13 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
}
Builder.CreateRetVoid();
- Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3143,6 +3145,7 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3203,14 +3206,13 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
- Builder.restoreIP(OldIP);
return LtGRFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
- OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3226,6 +3228,7 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
@@ -3311,14 +3314,13 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
}
Builder.CreateRetVoid();
- Builder.restoreIP(OldIP);
return LtGCFunc;
}
Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
- OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
+ IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
auto *FuncTy = FunctionType::get(
Builder.getVoidTy(),
@@ -3334,6 +3336,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
@@ -3394,7 +3397,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
- Builder.restoreIP(OldIP);
return LtGRFunc;
}
@@ -3407,6 +3409,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getPtrTy()},
/* IsVarArg */ false);
@@ -3419,6 +3422,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
BasicBlock *EntryBB =
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
Builder.SetInsertPoint(EntryBB);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
// Need to alloca memory here and deal with the pointers before getting
// LHS/RHS pointers out
@@ -3746,10 +3750,12 @@ static Error populateReductionFunction(
Function *ReductionFunc,
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
+ IRBuilder<>::InsertPointGuard IPG(Builder);
Module *Module = ReductionFunc->getParent();
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
+ Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Value *LHSArrayPtr = nullptr;
Value *RHSArrayPtr = nullptr;
if (IsGPU) {
diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir
new file mode 100644
index 0000000000000..d889ef4f5700c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir
@@ -0,0 +1,121 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+ omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
+ omp.declare_reduction @add_reduction_i32 : i32 init {
+ ^bb0(%arg0: i32 loc("test.f90":8:7)):
+ %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2)
+ omp.yield(%0 : i32) loc(#loc2)
+ } combiner {
+ ^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)):
+ %0 = llvm.add %arg0, %arg1 : i32 loc(#loc2)
+ omp.yield(%0 : i32) loc(#loc2)
+ } loc(#loc2)
+ llvm.func @_QQmain() {
+ %0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4)
+ %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4)
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
+ %3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1)
+ %4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1)
+ %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1)
+ %6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5)
+ %7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5)
+ %8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5)
+ %9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5)
+ %10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5)
+ %11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6)
+ %12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6)
+ llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7)
+ %15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4)
+ %16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7)
+ %17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7)
+ %18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7)
+ omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5)
+ %20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5)
+ %21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6)
+ omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) {
+ omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
+ omp.distribute {
+ omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) {
+ omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) {
+ llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2)
+ %22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8)
+ %23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8)
+ %34 = llvm.add %22, %23 : i32 loc(#loc8)
+ llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8)
+ omp.yield loc(#loc2)
+ } loc(#loc2)
+ } {omp.composite} loc(#loc2)
+ } {omp.composite} loc(#loc2)
+ omp.terminator loc(#loc2)
+ } {omp.composite} loc(#loc2)
+ omp.terminator loc(#loc2)
+ } loc(#loc2)
+ omp.terminator loc(#loc2)
+ } loc(#loc13)
+ llvm.return loc(#loc9)
+ } loc(#loc12)
+ llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> {
+ %0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6)
+ llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6)
+ } loc(#loc6)
+} loc(#loc)
+
+#loc = loc("test.f90":4:18)
+#loc1 = loc("test.f90":4:18)
+#loc2 = loc("test.f90":8:7)
+#loc3 = loc("test.f90":1:7)
+#loc4 = loc("test.f90":3:18)
+#loc5 = loc(unknown)
+#loc6 = loc("test.f90":5:18)
+#loc7 = loc("test.f90":6:7)
+#loc8 = loc("test.f90":10:7)
+#loc9 = loc("test.f90":16:7)
+
+#di_file = #llvm.di_file<"target7.f90" in "">
+#di_null_type = #llvm.di_null_type
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>,
+ sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang",
+ isOptimized = false, emissionKind = LineTablesOnly>
+#di_subroutine_type = #llvm.di_subroutine_type<
+ callingConvention = DW_CC_program, types = #di_null_type>
+#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>,
+ compileUnit = #di_compile_unit, scope = #di_file, name = "main",
+ file = #di_file, subprogramFlags = "Definition|MainSubprogram",
+ type = #di_subroutine_type>
+#di_subprogram1 = #llvm.di_subprogram<compileUnit = #di_compile_unit,
+ name = "target", file = #di_file, subprogramFlags = "Definition",
+ type = #di_subroutine_type>
+
+
+#loc12 = loc(fused<#di_subprogram>[#loc3])
+#loc13 = loc(fused<#di_subprogram1>[#loc2])
+
+// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1"
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func
+// CHECK-NOT: !dbg
+// CHECK: }
+// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func
+// CHECK-NOT: !dbg
+// CHECK: }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, LGTM 👍🏽
This is similar to llvm#147950 but for declare mapper function.
This is similar to llvm#147950 but for declare mapper function.
This is similar to llvm#147950 but for task proxy function.
…unction. (#148284) This is similar to llvm/llvm-project#147950 but for task proxy function.
We have this pattern of code in OMPIRBuilder for many functions that are used in reduction operations.
The insertion point is moved to the new function but the debug location is not updated. This means that reduction function will use the debug location that points to another function. This problem gets hidden because these functions gets inlined but the potential for failure exists.
This patch resets the debug location when insertion point is moved to new function. Some
InsertPointGuard
have been added to make sure why restore the debug location correctly when we are done with the reduction function.