Skip to content

[OMPIRBuilder] Don't use invalid debug loc in reduction functions. #147950

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2617,7 +2617,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
AttributeList FuncAttrs) {
InsertPointTy SavedIP = Builder.saveIP();
IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
Expand All @@ -2630,6 +2630,7 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
WcFunc->addParamAttr(1, Attribute::NoUndef);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", WcFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// ReduceList: thread local Reduce list.
// At the stage of the computation when this function is called, partially
Expand Down Expand Up @@ -2844,7 +2845,6 @@ Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
}

Builder.CreateRetVoid();
Builder.restoreIP(SavedIP);

return WcFunc;
}
Expand All @@ -2853,6 +2853,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
AttributeList FuncAttrs) {
LLVMContext &Ctx = M.getContext();
IRBuilder<>::InsertPointGuard IPG(Builder);
FunctionType *FuncTy =
FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getInt16Ty(),
Expand All @@ -2871,6 +2872,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
SarFunc->addParamAttr(3, Attribute::SExt);
BasicBlock *EntryBB = BasicBlock::Create(M.getContext(), "entry", SarFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Thread local Reduce list used to host the values of data to be reduced.
Argument *ReduceListArg = SarFunc->getArg(0);
Expand Down Expand Up @@ -3017,7 +3019,7 @@ Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
Expand All @@ -3033,6 +3035,7 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(

BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
Expand Down Expand Up @@ -3120,14 +3123,13 @@ Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
}

Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGCFunc;
}

Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
Expand All @@ -3143,6 +3145,7 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(

BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
Expand Down Expand Up @@ -3203,14 +3206,13 @@ Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGRFunc;
}

Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
ArrayRef<ReductionInfo> ReductionInfos, Type *ReductionsBufferTy,
AttributeList FuncAttrs) {
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
FunctionType *FuncTy = FunctionType::get(
Builder.getVoidTy(),
Expand All @@ -3226,6 +3228,7 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(

BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGCFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Buffer: global reduction buffer.
Argument *BufferArg = LtGCFunc->getArg(0);
Expand Down Expand Up @@ -3311,14 +3314,13 @@ Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
}

Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGCFunc;
}

Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
ArrayRef<ReductionInfo> ReductionInfos, Function *ReduceFn,
Type *ReductionsBufferTy, AttributeList FuncAttrs) {
OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
IRBuilder<>::InsertPointGuard IPG(Builder);
LLVMContext &Ctx = M.getContext();
auto *FuncTy = FunctionType::get(
Builder.getVoidTy(),
Expand All @@ -3334,6 +3336,7 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(

BasicBlock *EntryBlock = BasicBlock::Create(Ctx, "entry", LtGRFunc);
Builder.SetInsertPoint(EntryBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Buffer: global reduction buffer.
Argument *BufferArg = LtGRFunc->getArg(0);
Expand Down Expand Up @@ -3394,7 +3397,6 @@ Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
->addFnAttr(Attribute::NoUnwind);
Builder.CreateRetVoid();
Builder.restoreIP(OldIP);
return LtGRFunc;
}

Expand All @@ -3407,6 +3409,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
Expected<Function *> OpenMPIRBuilder::createReductionFunction(
StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
IRBuilder<>::InsertPointGuard IPG(Builder);
auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
{Builder.getPtrTy(), Builder.getPtrTy()},
/* IsVarArg */ false);
Expand All @@ -3419,6 +3422,7 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
BasicBlock *EntryBB =
BasicBlock::Create(M.getContext(), "entry", ReductionFunc);
Builder.SetInsertPoint(EntryBB);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());

// Need to alloca memory here and deal with the pointers before getting
// LHS/RHS pointers out
Expand Down Expand Up @@ -3746,10 +3750,12 @@ static Error populateReductionFunction(
Function *ReductionFunc,
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
IRBuilder<> &Builder, ArrayRef<bool> IsByRef, bool IsGPU) {
IRBuilder<>::InsertPointGuard IPG(Builder);
Module *Module = ReductionFunc->getParent();
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Value *LHSArrayPtr = nullptr;
Value *RHSArrayPtr = nullptr;
if (IsGPU) {
Expand Down
121 changes: 121 additions & 0 deletions mlir/test/Target/LLVMIR/omptarget-debug-reduc-fn-loc.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s

module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
omp.private {type = private} @_QFEi_private_i32 : i32 loc(#loc1)
omp.declare_reduction @add_reduction_i32 : i32 init {
^bb0(%arg0: i32 loc("test.f90":8:7)):
%0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc2)
omp.yield(%0 : i32) loc(#loc2)
} combiner {
^bb0(%arg0: i32 loc("test.f90":8:7), %arg1: i32 loc("test.f90":8:7)):
%0 = llvm.add %arg0, %arg1 : i32 loc(#loc2)
omp.yield(%0 : i32) loc(#loc2)
} loc(#loc2)
llvm.func @_QQmain() {
%0 = llvm.mlir.constant(1 : i64) : i64 loc(#loc4)
%1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr<5> loc(#loc4)
%2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr loc(#loc4)
%3 = llvm.mlir.constant(1 : i64) : i64 loc(#loc1)
%4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr<5> loc(#loc1)
%5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr loc(#loc1)
%6 = llvm.mlir.constant(8191 : index) : i64 loc(#loc5)
%7 = llvm.mlir.constant(0 : index) : i64 loc(#loc5)
%8 = llvm.mlir.constant(1 : index) : i64 loc(#loc5)
%9 = llvm.mlir.constant(0 : i32) : i32 loc(#loc5)
%10 = llvm.mlir.constant(8192 : index) : i64 loc(#loc5)
%11 = llvm.mlir.addressof @_QFEarr : !llvm.ptr<1> loc(#loc6)
%12 = llvm.addrspacecast %11 : !llvm.ptr<1> to !llvm.ptr loc(#loc6)
llvm.store %9, %2 : i32, !llvm.ptr loc(#loc7)
%15 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} loc(#loc4)
%16 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc7)
%17 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) loc(#loc7)
%18 = omp.map.info var_ptr(%12 : !llvm.ptr, !llvm.array<8192 x i32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%17) -> !llvm.ptr {name = "arr"} loc(#loc7)
omp.target map_entries(%15 -> %arg0, %16 -> %arg1, %18 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
%19 = llvm.mlir.constant(8192 : i32) : i32 loc(#loc5)
%20 = llvm.mlir.constant(1 : i32) : i32 loc(#loc5)
%21 = llvm.mlir.constant(8192 : index) : i64 loc(#loc6)
omp.teams reduction(@add_reduction_i32 %arg0 -> %arg3 : !llvm.ptr) {
omp.parallel private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) {
omp.distribute {
omp.wsloop reduction(@add_reduction_i32 %arg3 -> %arg5 : !llvm.ptr) {
omp.loop_nest (%arg6) : i32 = (%20) to (%19) inclusive step (%20) {
llvm.store %arg6, %arg4 : i32, !llvm.ptr loc(#loc2)
%22 = llvm.load %arg5 : !llvm.ptr -> i32 loc(#loc8)
%23 = llvm.load %arg4 : !llvm.ptr -> i32 loc(#loc8)
%34 = llvm.add %22, %23 : i32 loc(#loc8)
llvm.store %34, %arg5 : i32, !llvm.ptr loc(#loc8)
omp.yield loc(#loc2)
} loc(#loc2)
} {omp.composite} loc(#loc2)
} {omp.composite} loc(#loc2)
omp.terminator loc(#loc2)
} {omp.composite} loc(#loc2)
omp.terminator loc(#loc2)
} loc(#loc2)
omp.terminator loc(#loc2)
} loc(#loc13)
llvm.return loc(#loc9)
} loc(#loc12)
llvm.mlir.global internal @_QFEarr() {addr_space = 1 : i32} : !llvm.array<8192 x i32> {
%0 = llvm.mlir.zero : !llvm.array<8192 x i32> loc(#loc6)
llvm.return %0 : !llvm.array<8192 x i32> loc(#loc6)
} loc(#loc6)
} loc(#loc)

#loc = loc("test.f90":4:18)
#loc1 = loc("test.f90":4:18)
#loc2 = loc("test.f90":8:7)
#loc3 = loc("test.f90":1:7)
#loc4 = loc("test.f90":3:18)
#loc5 = loc(unknown)
#loc6 = loc("test.f90":5:18)
#loc7 = loc("test.f90":6:7)
#loc8 = loc("test.f90":10:7)
#loc9 = loc("test.f90":16:7)

#di_file = #llvm.di_file<"target7.f90" in "">
#di_null_type = #llvm.di_null_type
#di_compile_unit = #llvm.di_compile_unit<id = distinct[0]<>,
sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang",
isOptimized = false, emissionKind = LineTablesOnly>
#di_subroutine_type = #llvm.di_subroutine_type<
callingConvention = DW_CC_program, types = #di_null_type>
#di_subprogram = #llvm.di_subprogram<id = distinct[1]<>,
compileUnit = #di_compile_unit, scope = #di_file, name = "main",
file = #di_file, subprogramFlags = "Definition|MainSubprogram",
type = #di_subroutine_type>
#di_subprogram1 = #llvm.di_subprogram<compileUnit = #di_compile_unit,
name = "target", file = #di_file, subprogramFlags = "Definition",
type = #di_subroutine_type>


#loc12 = loc(fused<#di_subprogram>[#loc3])
#loc13 = loc(fused<#di_subprogram1>[#loc2])

// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @"__omp_offloading_{{.*}}__QQmain_l8_omp$reduction$reduction_func.1"
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_shuffle_and_reduce_func.2
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_inter_warp_copy_func.3
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_list_to_global_reduce_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_copy_func
// CHECK-NOT: !dbg
// CHECK: }
// CHECK-DAG: define internal void @_omp_reduction_global_to_list_reduce_func
// CHECK-NOT: !dbg
// CHECK: }
Loading