-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[KeyInstr][LoopUnroll] Remap atoms while unrolling #133489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-llvm-transforms Author: Orlando Cazalet-Hyams (OCHyams) ChangesFull diff: https://github.com/llvm/llvm-project/pull/133489.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 45b49671dd3b6..1fa4eddc459c0 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -752,6 +752,14 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
+ // Remap source location atom instance. Do this now, rather than
+ // when we remap instructions, because remap is called once we've
+ // cloned all blocks (all the clones would get the same atom
+ // number).
+ if (!VMap.AtomMap.empty())
+ for (Instruction &I : *New)
+ RemapSourceAtom(&I, VMap);
+
// Update our running map of newest clones
LastValueMap[*BB] = New;
for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
@@ -802,7 +810,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
- // Remap all instructions in the most recent iteration
+ // Remap all instructions in the most recent iteration.
+ // Key Instructions: Nothing to do - we've already remapped the atoms.
remapInstructionsInBlocks(NewBlocks, LastValueMap);
for (BasicBlock *NewBlock : NewBlocks)
for (Instruction &I : *NewBlock)
diff --git a/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll
new file mode 100644
index 0000000000000..6deb04cc00f8d
--- /dev/null
+++ b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll-runtime.ll
@@ -0,0 +1,69 @@
+
+; RUN: opt %s -S --passes=loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -o - \
+; RUN: | FileCheck %s
+
+;; Check atoms are remapped for runtime unrolling.
+
+; CHECK: for.body.epil:
+; CHECK-NEXT: store i64 %indvars.iv.unr, ptr %p, align 4, !dbg [[G2R1:!.*]]
+
+; CHECK: for.body.epil.1:
+; CHECK-NEXT: store i64 %indvars.iv.next.epil, ptr %p, align 4, !dbg [[G3R1:!.*]]
+
+; CHECK: for.body.epil.2:
+; CHECK-NEXT: store i64 %indvars.iv.next.epil.1, ptr %p, align 4, !dbg [[G4R1:!.*]]
+
+; CHECK: for.body:
+; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %for.body.lr.ph.new ], [ %indvars.iv.next.3, %for.body ]
+; CHECK-NEXT: %niter = phi i64 [ 0, %for.body.lr.ph.new ], [ %niter.next.3, %for.body ]
+; CHECK-NEXT: store i64 %indvars.iv, ptr %p, align 4, !dbg [[G1R1:!.*]]
+; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK-NEXT: store i64 %indvars.iv.next, ptr %p, align 4, !dbg [[G5R1:!.*]]
+; CHECK-NEXT: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
+; CHECK-NEXT: store i64 %indvars.iv.next.1, ptr %p, align 4, !dbg [[G6R1:!.*]]
+; CHECK-NEXT: %indvars.iv.next.2 = add nuw nsw i64 %indvars.iv, 3
+; CHECK-NEXT: store i64 %indvars.iv.next.2, ptr %p, align 4, !dbg [[G7R1:!.*]]
+; CHECK-NEXT: %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 4
+
+; CHECK: [[G2R1]] = !DILocation({{.*}}, atomGroup: 2, atomRank: 1)
+; CHECK: [[G3R1]] = !DILocation({{.*}}, atomGroup: 3, atomRank: 1)
+; CHECK: [[G4R1]] = !DILocation({{.*}}, atomGroup: 4, atomRank: 1)
+; CHECK: [[G1R1]] = !DILocation({{.*}}, atomGroup: 1, atomRank: 1)
+; CHECK: [[G5R1]] = !DILocation({{.*}}, atomGroup: 5, atomRank: 1)
+; CHECK: [[G6R1]] = !DILocation({{.*}}, atomGroup: 6, atomRank: 1)
+; CHECK: [[G7R1]] = !DILocation({{.*}}, atomGroup: 7, atomRank: 1)
+
+define i32 @unroll(ptr %p, i32 %N) local_unnamed_addr !dbg !5 {
+entry:
+ %cmp9 = icmp eq i32 %N, 0
+ br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ %wide.trip.count = zext i32 %N to i64
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %r = phi i32 [ 0, %entry ], [ 1, %for.body ]
+ ret i32 %r
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ store i64 %indvars.iv, ptr %p, !dbg !8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "test.ll", directory: "/")
+!2 = !{i32 17}
+!3 = !{i32 0}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "unroll", linkageName: "unroll", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 1, column: 1, scope: !5, atomGroup: 1, atomRank: 1)
diff --git a/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll.ll b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll.ll
new file mode 100644
index 0000000000000..557e8387e5bc5
--- /dev/null
+++ b/llvm/test/DebugInfo/KeyInstructions/Generic/loop-unroll.ll
@@ -0,0 +1,48 @@
+; RUN: opt %s --passes=loop-unroll -S -o - -S | FileCheck %s
+
+; CHECK: store i32 %a, ptr %p, align 4, !dbg [[G1R1:!.*]]
+; CHECK: %v.1 = add i32 1, %a, !dbg [[G2R2:!.*]]
+; CHECK: store i32 %v.1, ptr %p, align 4, !dbg [[G2R1:!.*]]
+; CHECK: %v.2 = add i32 2, %a, !dbg [[G3R2:!.*]]
+; CHECK: store i32 %v.2, ptr %p, align 4, !dbg [[G3R1:!.*]]
+
+; CHECK: [[G1R1]] = !DILocation({{.*}}, atomGroup: 1, atomRank: 1)
+; CHECK: [[G2R2]] = !DILocation({{.*}}, atomGroup: 2, atomRank: 2)
+; CHECK: [[G2R1]] = !DILocation({{.*}}, atomGroup: 2, atomRank: 1)
+; CHECK: [[G3R2]] = !DILocation({{.*}}, atomGroup: 3, atomRank: 2)
+; CHECK: [[G3R1]] = !DILocation({{.*}}, atomGroup: 3, atomRank: 1)
+
+define void @f(ptr %p, i32 %a) local_unnamed_addr #0 !dbg !5 {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %v = add i32 %i, %a, !dbg !11
+ store i32 %v, ptr %p, align 4, !dbg !12
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %inc, 3
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare i32 @bar(...) local_unnamed_addr
+
+attributes #0 = { nounwind ssp uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "/home/och/scratch/test.ll", directory: "/")
+!2 = !{i32 8}
+!3 = !{i32 0}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "f", linkageName: "f", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!11 = !DILocation(line: 4, column: 1, scope: !5, atomGroup: 1, atomRank: 2)
+!12 = !DILocation(line: 5, column: 1, scope: !5, atomGroup: 1, atomRank: 1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, but question inline
// Remap source location atom instance. Do this now, rather than | ||
// when we remap instructions, because remap is called once we've | ||
// cloned all blocks (all the clones would get the same atom | ||
// number). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I get why all the clones need to get distinct atom numbers; what is it here that ensures that's the case, don't we need to reset something in VMap to ensure that's the case? i.e., if we called RemapSourceAtom later what would be different?
(I get the overall picture, but not how it's achieved here.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CloneBasicBlock
maps the atoms from the cloned block so new ones can be remapped in the duplicated block (the next unroll iteration). It stores that info in VMap
. We need to update those instructions now using that info.
remapInstructionsInBlocks
(which does also call RemapSourceAtom
) which is called later for all cloned is passed LastValueMap
. That map just has the "running map" of new cloned instructions, i.e., values are overwritten after each unrolled iteration. If we used that infra to remap source atoms, we'd just remap all atoms x to y, in all the unrolled blocks. But what we want is x -> y1, x->y
2, with a unique value in each unrolled iteration.
Does that make any sense? I fear I'm mincing this explanation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah right -- it's the fact that Something Happens (TM) during CloneBasicBlock that I think I was missing.
4dda51f
to
3c02939
Compare
fc36dec
to
520151b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
// Remap source location atom instance. Do this now, rather than | ||
// when we remap instructions, because remap is called once we've | ||
// cloned all blocks (all the clones would get the same atom | ||
// number). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah right -- it's the fact that Something Happens (TM) during CloneBasicBlock that I think I was missing.
No description provided.