Skip to content

Commit f6569d1

Browse files
author
vzakhari
authored
[SPIRITTAnnotations] Fix debug info for ITT calls. (#3829)
Assign debug info for ITT calls based on the IR that they annotate. In addition, use proper argument type for atomic APIs. Signed-off-by: Vyacheslav Zakharin <[email protected]>
1 parent dd77f34 commit f6569d1

File tree

4 files changed

+165
-30
lines changed

4 files changed

+165
-30
lines changed

llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp

+32-14
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,17 @@ Instruction *emitCall(Module &M, Type *RetTy, StringRef FunctionName,
157157
// Insert instrumental annotation calls, that has no arguments (for example
158158
// work items start/finish/resume and barrier annotation.
159159
void insertSimpleInstrumentationCall(Module &M, StringRef Name,
160-
Instruction *Position) {
160+
Instruction *Position,
161+
const DebugLoc &DL) {
161162
Type *VoidTy = Type::getVoidTy(M.getContext());
162163
ArrayRef<Value *> Args;
163164
Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position);
164165
assert(InstrumentationCall && "Instrumentation call creation failed");
166+
InstrumentationCall->setDebugLoc(DL);
165167
}
166168

167169
// Insert instrumental annotation calls for SPIR-V atomics.
168-
void insertAtomicInstrumentationCall(Module &M, StringRef Name,
170+
bool insertAtomicInstrumentationCall(Module &M, StringRef Name,
169171
CallInst *AtomicFun, Instruction *Position,
170172
StringRef AtomicName) {
171173
LLVMContext &Ctx = M.getContext();
@@ -208,7 +210,7 @@ void insertAtomicInstrumentationCall(Module &M, StringRef Name,
208210
auto *MemFlag = dyn_cast<ConstantInt>(AtomicFun->getArgOperand(2));
209211
// TODO: add non-constant memory order processing
210212
if (!MemFlag)
211-
return;
213+
return false;
212214
uint64_t IntMemFlag = MemFlag->getValue().getZExtValue();
213215
uint64_t Order;
214216
if (IntMemFlag & 0x2)
@@ -219,10 +221,15 @@ void insertAtomicInstrumentationCall(Module &M, StringRef Name,
219221
Order = 3;
220222
else
221223
Order = 0;
224+
PointerType *Int8PtrAS4Ty = PointerType::get(IntegerType::get(Ctx, 8), 4);
225+
Ptr = CastInst::CreatePointerBitCastOrAddrSpaceCast(Ptr, Int8PtrAS4Ty, "",
226+
Position);
222227
Value *MemOrder = ConstantInt::get(Int32Ty, Order);
223228
Value *Args[] = {Ptr, AtomicOp, MemOrder};
224229
Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position);
225230
assert(InstrumentationCall && "Instrumentation call creation failed");
231+
InstrumentationCall->setDebugLoc(AtomicFun->getDebugLoc());
232+
return true;
226233
}
227234

228235
} // namespace
@@ -245,15 +252,24 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M,
245252

246253
// At the beggining of a kernel insert work item start annotation
247254
// instruction.
248-
if (IsSPIRKernel)
249-
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START,
250-
&*inst_begin(F));
255+
if (IsSPIRKernel) {
256+
Instruction *InsertPt = &*inst_begin(F);
257+
if (InsertPt->isDebugOrPseudoInst())
258+
InsertPt = InsertPt->getNextNonDebugInstruction();
259+
assert(InsertPt && "Function does not have any real instructions.");
260+
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START, InsertPt,
261+
InsertPt->getDebugLoc());
262+
IRModified = true;
263+
}
251264

252265
for (BasicBlock &BB : F) {
253266
// Insert Finish instruction before return instruction
254267
if (IsSPIRKernel)
255-
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
256-
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI);
268+
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
269+
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI,
270+
RI->getDebugLoc());
271+
IRModified = true;
272+
}
257273
for (Instruction &I : BB) {
258274
CallInst *CI = dyn_cast<CallInst>(&I);
259275
if (!CI)
@@ -275,15 +291,17 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M,
275291
return CalleeName.startswith(Name);
276292
})) {
277293
Instruction *InstAfterBarrier = CI->getNextNode();
278-
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI);
294+
const DebugLoc &DL = CI->getDebugLoc();
295+
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI, DL);
279296
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_RESUME,
280-
InstAfterBarrier);
297+
InstAfterBarrier, DL);
298+
IRModified = true;
281299
} else if (CalleeName.startswith(SPIRV_ATOMIC_INST)) {
282300
Instruction *InstAfterAtomic = CI->getNextNode();
283-
insertAtomicInstrumentationCall(M, ITT_ANNOTATION_ATOMIC_START, CI,
284-
CI, CalleeName);
285-
insertAtomicInstrumentationCall(M, ITT_ANNOTATION_ATOMIC_FINISH, CI,
286-
InstAfterAtomic, CalleeName);
301+
IRModified |= insertAtomicInstrumentationCall(
302+
M, ITT_ANNOTATION_ATOMIC_START, CI, CI, CalleeName);
303+
IRModified |= insertAtomicInstrumentationCall(
304+
M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic, CalleeName);
287305
}
288306
}
289307
}

llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll

+14-8
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,11 @@ if.end.i: ; preds = %entry
4646
%9 = addrspacecast i64* %8 to i64 addrspace(4)*
4747
%10 = load i64, i64 addrspace(4)* %9, align 8
4848
%add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10
49-
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 0)
49+
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
50+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
5051
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
51-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 0)
52+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1]] to i8 addrspace(4)*
53+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
5254
%call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2
5355
call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i34)
5456
%ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4
@@ -66,9 +68,11 @@ define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspac
6668
entry:
6769
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
6870
; CHECK-NEXT: entry:
69-
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 0, i32 0)
71+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
72+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
7073
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
71-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 0, i32 0)
74+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S]] to i8 addrspace(4)*
75+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
7276
call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %ptr, i32 1, i32 896) #2
7377
; CHECK-NOT: call void @__itt_offload_wi_finish_wrapper()
7478
ret void
@@ -93,9 +97,11 @@ entry:
9397
%add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5
9498
%6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19
9599
%7 = extractelement <3 x i64> %6, i64 0
96-
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 0)
100+
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
101+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
97102
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896)
98-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 0)
103+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2]] to i8 addrspace(4)*
104+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
99105
%call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2
100106
%ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7
101107
%ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)*
@@ -106,8 +112,8 @@ entry:
106112
}
107113

108114
; CHECK: declare void @__itt_offload_wi_start_wrapper()
109-
; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32)
110-
; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32)
115+
; CHECK: declare void @__itt_offload_atomic_op_start(i8 addrspace(4)*, i32, i32)
116+
; CHECK: declare void @__itt_offload_atomic_op_finish(i8 addrspace(4)*, i32, i32)
111117
; CHECK: declare void @__itt_offload_wi_finish_wrapper()
112118

113119
attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }

llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll

+14-8
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ if.end.i: ; preds = %entry
4343
%7 = load i64, i64 addrspace(4)* %6, align 8
4444
%add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7
4545
%conv.i.i = trunc i64 %4 to i32
46-
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 0
46+
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
47+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0
4748
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
48-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 0
49+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1]] to i8 addrspace(4)*
50+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0
4951
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2
5052
tail call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i)
5153
br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit
@@ -59,9 +61,11 @@ _ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIi
5961
define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %ptr) {
6062
entry:
6163
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
62-
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 1, i32 0)
64+
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
65+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
6366
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
64-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 1, i32 0)
67+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S]] to i8 addrspace(4)*
68+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
6569
%0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15
6670
%1 = extractelement <3 x i64> %0, i64 0
6771
%conv = trunc i64 %1 to i32
@@ -86,18 +90,20 @@ entry:
8690
%3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15
8791
%4 = extractelement <3 x i64> %3, i64 0
8892
%conv.i = trunc i64 %4 to i32
89-
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 0)
93+
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
94+
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
9095
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896
91-
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 0)
96+
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2]] to i8 addrspace(4)*
97+
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
9298
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2
9399
; CHECK: call void @__itt_offload_wi_finish_wrapper()
94100
; CHECK-NEXT: ret void
95101
ret void
96102
}
97103

98104
; CHECK: declare void @__itt_offload_wi_start_wrapper()
99-
; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32)
100-
; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32)
105+
; CHECK: declare void @__itt_offload_atomic_op_start(i8 addrspace(4)*, i32, i32)
106+
; CHECK: declare void @__itt_offload_atomic_op_finish(i8 addrspace(4)*, i32, i32)
101107
; CHECK: declare void @__itt_offload_wi_finish_wrapper()
102108

103109
attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }

0 commit comments

Comments
 (0)