Skip to content

Fixed debug info for ITT calls. #3829

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions llvm/lib/Transforms/Instrumentation/SPIRITTAnnotations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,17 @@ Instruction *emitCall(Module &M, Type *RetTy, StringRef FunctionName,
// Insert instrumental annotation calls, that has no arguments (for example
// work items start/finish/resume and barrier annotation.
void insertSimpleInstrumentationCall(Module &M, StringRef Name,
Instruction *Position) {
Instruction *Position,
const DebugLoc &DL) {
Type *VoidTy = Type::getVoidTy(M.getContext());
ArrayRef<Value *> Args;
Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position);
assert(InstrumentationCall && "Instrumentation call creation failed");
InstrumentationCall->setDebugLoc(DL);
}

// Insert instrumental annotation calls for SPIR-V atomics.
void insertAtomicInstrumentationCall(Module &M, StringRef Name,
bool insertAtomicInstrumentationCall(Module &M, StringRef Name,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest leaving the function to be of a void type (changed in #3691)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think that would be correct. We have to set IRModified to true whenever we modify IR, so that we can clobber all analyses. Since this function may exit without modifying IR, it makes sense to let it return true/false.

CallInst *AtomicFun, Instruction *Position,
StringRef AtomicName) {
LLVMContext &Ctx = M.getContext();
Expand Down Expand Up @@ -208,7 +210,7 @@ void insertAtomicInstrumentationCall(Module &M, StringRef Name,
auto *MemFlag = dyn_cast<ConstantInt>(AtomicFun->getArgOperand(2));
// TODO: add non-constant memory order processing
if (!MemFlag)
return;
return false;
uint64_t IntMemFlag = MemFlag->getValue().getZExtValue();
uint64_t Order;
if (IntMemFlag & 0x2)
Expand All @@ -219,10 +221,15 @@ void insertAtomicInstrumentationCall(Module &M, StringRef Name,
Order = 3;
else
Order = 0;
PointerType *Int8PtrAS4Ty = PointerType::get(IntegerType::get(Ctx, 8), 4);
Ptr = CastInst::CreatePointerBitCastOrAddrSpaceCast(Ptr, Int8PtrAS4Ty, "",
Position);
Value *MemOrder = ConstantInt::get(Int32Ty, Order);
Value *Args[] = {Ptr, AtomicOp, MemOrder};
Instruction *InstrumentationCall = emitCall(M, VoidTy, Name, Args, Position);
assert(InstrumentationCall && "Instrumentation call creation failed");
InstrumentationCall->setDebugLoc(AtomicFun->getDebugLoc());
return true;
}

} // namespace
Expand All @@ -245,15 +252,24 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M,

// At the beggining of a kernel insert work item start annotation
// instruction.
if (IsSPIRKernel)
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START,
&*inst_begin(F));
if (IsSPIRKernel) {
Instruction *InsertPt = &*inst_begin(F);
if (InsertPt->isDebugOrPseudoInst())
InsertPt = InsertPt->getNextNonDebugInstruction();
assert(InsertPt && "Function does not have any real instructions.");
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_START, InsertPt,
InsertPt->getDebugLoc());
IRModified = true;
}

for (BasicBlock &BB : F) {
// Insert Finish instruction before return instruction
if (IsSPIRKernel)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI);
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_FINISH, RI,
RI->getDebugLoc());
IRModified = true;
}
for (Instruction &I : BB) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
Expand All @@ -275,15 +291,17 @@ PreservedAnalyses SPIRITTAnnotationsPass::run(Module &M,
return CalleeName.startswith(Name);
})) {
Instruction *InstAfterBarrier = CI->getNextNode();
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI);
const DebugLoc &DL = CI->getDebugLoc();
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WG_BARRIER, CI, DL);
insertSimpleInstrumentationCall(M, ITT_ANNOTATION_WI_RESUME,
InstAfterBarrier);
InstAfterBarrier, DL);
IRModified = true;
} else if (CalleeName.startswith(SPIRV_ATOMIC_INST)) {
Instruction *InstAfterAtomic = CI->getNextNode();
insertAtomicInstrumentationCall(M, ITT_ANNOTATION_ATOMIC_START, CI,
CI, CalleeName);
insertAtomicInstrumentationCall(M, ITT_ANNOTATION_ATOMIC_FINISH, CI,
InstAfterAtomic, CalleeName);
IRModified |= insertAtomicInstrumentationCall(
M, ITT_ANNOTATION_ATOMIC_START, CI, CI, CalleeName);
IRModified |= insertAtomicInstrumentationCall(
M, ITT_ANNOTATION_ATOMIC_FINISH, CI, InstAfterAtomic, CalleeName);
}
}
}
Expand Down
22 changes: 14 additions & 8 deletions llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ if.end.i: ; preds = %entry
%9 = addrspacecast i64* %8 to i64 addrspace(4)*
%10 = load i64, i64 addrspace(4)* %9, align 8
%add.ptr.i34 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %10
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 0, i32 0)
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 0, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
%call3.i.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i34, i32 1, i32 896) #2
call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i34)
%ptridx.i.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %4
Expand All @@ -66,9 +68,11 @@ define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspac
entry:
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 0, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 0, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %ptr, i32 1, i32 896) #2
; CHECK-NOT: call void @__itt_offload_wi_finish_wrapper()
ret void
Expand All @@ -93,9 +97,11 @@ entry:
%add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %5
%6 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !19
%7 = extractelement <3 x i64> %6, i64 0
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 0, i32 0)
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicLoad{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896)
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 0, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 0, i32 0)
%call3.i.i.i = tail call spir_func i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(i32 addrspace(1)* %add.ptr.i32, i32 1, i32 896) #2
%ptridx.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 %7
%ptridx.ascast.i.i = addrspacecast i32 addrspace(1)* %ptridx.i.i to i32 addrspace(4)*
Expand All @@ -106,8 +112,8 @@ entry:
}

; CHECK: declare void @__itt_offload_wi_start_wrapper()
; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_start(i8 addrspace(4)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_finish(i8 addrspace(4)*, i32, i32)
; CHECK: declare void @__itt_offload_wi_finish_wrapper()

attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/load.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
Expand Down
22 changes: 14 additions & 8 deletions llvm/test/Transforms/SPIRITTAnnotations/itt_atomic_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ if.end.i: ; preds = %entry
%7 = load i64, i64 addrspace(4)* %6, align 8
%add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_1, i64 %7
%conv.i.i = trunc i64 %4 to i32
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]], i32 1, i32 0
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_1]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_1]], i32 1, i32 0
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_1]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i.i) #2
tail call spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %add.ptr.i)
br label %_ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIiEvNS0_5queueEmENKUlRS1_E_clES7_EUlNS0_4itemILi1ELb1EEEE_Li1EEEvNS0_5rangeIXT1_EEET0_ENKUlSA_E_clESA_.exit
Expand All @@ -59,9 +61,11 @@ _ZZN2cl4sycl7handler24parallel_for_lambda_implI12store_kernelIiEZZ10store_testIi
define weak_odr dso_local spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %ptr) {
entry:
; CHECK-LABEL: spir_func void @__synthetic_spir_fun_call(i32 addrspace(1)* %{{.*}}) {
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]], i32 1, i32 0)
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_S]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_S]], i32 1, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_S]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
%0 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15
%1 = extractelement <3 x i64> %0, i64 0
%conv = trunc i64 %1 to i32
Expand All @@ -86,18 +90,20 @@ entry:
%3 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*), align 32, !noalias !15
%4 = extractelement <3 x i64> %3, i64 0
%conv.i = trunc i64 %4 to i32
; CHECK: call void @__itt_offload_atomic_op_start(i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]], i32 1, i32 0)
; CHECK: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2:[0-9a-zA-Z._]+]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_start(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
; CHECK-NEXT: {{.*}}__spirv_AtomicStore{{.*}}(i32 addrspace(1)* %[[ATOMIC_ARG_2]],{{.*}}, i32 896
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i32 addrspace(1)* %[[ATOMIC_ARG_2]], i32 1, i32 0)
; CHECK-NEXT: [[ARG_ASCAST:%[0-9a-zA-Z._]+]] = addrspacecast i32 addrspace(1)* %[[ATOMIC_ARG_2]] to i8 addrspace(4)*
; CHECK-NEXT: call void @__itt_offload_atomic_op_finish(i8 addrspace(4)* [[ARG_ASCAST]], i32 1, i32 0)
tail call spir_func void @_Z19__spirv_AtomicStorePU3AS1iN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagEi(i32 addrspace(1)* %add.ptr.i, i32 1, i32 896, i32 %conv.i) #2
; CHECK: call void @__itt_offload_wi_finish_wrapper()
; CHECK-NEXT: ret void
ret void
}

; CHECK: declare void @__itt_offload_wi_start_wrapper()
; CHECK: declare void @__itt_offload_atomic_op_start(i32 addrspace(1)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_finish(i32 addrspace(1)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_start(i8 addrspace(4)*, i32, i32)
; CHECK: declare void @__itt_offload_atomic_op_finish(i8 addrspace(4)*, i32, i32)
; CHECK: declare void @__itt_offload_wi_finish_wrapper()

attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="llvm-test-suite/SYCL/AtomicRef/store.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
Expand Down
Loading