Skip to content

Commit 62d6fa8

Browse files
[SPIR-V] Support optimization of branching with analyzeBranch/removeBranch/insertBranch functions (#110653)
This PR fixes implementation of `SPIRVInstrInfo::analyzeBranch()` and adds implementations of `SPIRVInstrInfo::removeBranch()` and `SPIRVInstrInfo::insertBranch()` to support Branch Folding and If Conversion optimization. The attached test case failed before this PR due to report_fatal_error() firing on missing implementation of `SPIRVInstrInfo::removeBranch()`. The new test case is not able to pass spirv-val check at the moment due to the issue described in #110652 , this is not related to this PR. This PR also updates instructions definition in tablegen to set isBranch=1 for relevant instructions.
1 parent a2359a8 commit 62d6fa8

File tree

3 files changed

+283
-31
lines changed

3 files changed

+283
-31
lines changed

llvm/lib/Target/SPIRV/SPIRVInstrInfo.cpp

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -184,26 +184,17 @@ bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
184184
MachineBasicBlock *&FBB,
185185
SmallVectorImpl<MachineOperand> &Cond,
186186
bool AllowModify) const {
187-
TBB = nullptr;
188-
FBB = nullptr;
189-
if (MBB.empty())
190-
return false;
191-
auto MI = MBB.getLastNonDebugInstr();
192-
if (!MI.isValid())
193-
return false;
194-
if (MI->getOpcode() == SPIRV::OpBranch) {
195-
TBB = MI->getOperand(0).getMBB();
196-
return false;
197-
} else if (MI->getOpcode() == SPIRV::OpBranchConditional) {
198-
Cond.push_back(MI->getOperand(0));
199-
TBB = MI->getOperand(1).getMBB();
200-
if (MI->getNumOperands() == 3) {
201-
FBB = MI->getOperand(2).getMBB();
202-
}
203-
return false;
204-
} else {
205-
return true;
206-
}
187+
// We do not allow to restructure blocks by results of analyzeBranch(),
188+
// because it may potentially break structured control flow and anyway
189+
// doubtedly may be useful in SPIRV, including such reasons as, e.g.:
190+
// 1) there is no way to encode `if (Cond) then Stmt` logic, only full
191+
// if-then-else is supported by OpBranchConditional, so if we supported
192+
// splitting of blocks ending with OpBranchConditional MachineBasicBlock.cpp
193+
// would expect successfull implementation of calls to insertBranch() setting
194+
// FBB to null that is not feasible; 2) it's not possible to delete
195+
// instructions after the unconditional branch, because this instruction must
196+
// be the last instruction in a block.
197+
return true;
207198
}
208199

209200
// Remove the branching code at the end of the specific MBB.
@@ -212,9 +203,16 @@ bool SPIRVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
212203
// If \p BytesRemoved is non-null, report the change in code size from the
213204
// removed instructions.
214205
unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB,
215-
int *BytesRemoved) const {
216-
report_fatal_error("Branch removal not supported, as MBB info not propagated"
217-
" to OpPhi instructions. Try using -O0 instead.");
206+
int * /*BytesRemoved*/) const {
207+
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
208+
if (I == MBB.end())
209+
return 0;
210+
211+
if (I->getOpcode() == SPIRV::OpBranch) {
212+
I->eraseFromParent();
213+
return 1;
214+
}
215+
return 0;
218216
}
219217

220218
// Insert branch code into the end of the specified MachineBasicBlock. The
@@ -230,12 +228,16 @@ unsigned SPIRVInstrInfo::removeBranch(MachineBasicBlock &MBB,
230228
//
231229
// The CFG information in MBB.Predecessors and MBB.Successors must be valid
232230
// before calling this function.
233-
unsigned SPIRVInstrInfo::insertBranch(
234-
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
235-
ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
236-
report_fatal_error("Branch insertion not supported, as MBB info not "
237-
"propagated to OpPhi instructions. Try using "
238-
"-O0 instead.");
231+
unsigned SPIRVInstrInfo::insertBranch(MachineBasicBlock &MBB,
232+
MachineBasicBlock *TBB,
233+
MachineBasicBlock *FBB,
234+
ArrayRef<MachineOperand> Cond,
235+
const DebugLoc &DL,
236+
int * /*BytesAdded*/) const {
237+
if (!TBB)
238+
return 0;
239+
BuildMI(&MBB, DL, get(SPIRV::OpBranch)).addMBB(TBB);
240+
return 1;
239241
}
240242

241243
void SPIRVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,

llvm/lib/Target/SPIRV/SPIRVInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -622,13 +622,13 @@ def OpLoopMerge: Op<246, (outs), (ins unknown:$merge, unknown:$continue, LoopCon
622622
def OpSelectionMerge: Op<247, (outs), (ins unknown:$merge, SelectionControl:$sc),
623623
"OpSelectionMerge $merge $sc">;
624624
def OpLabel: Op<248, (outs ID:$label), (ins), "$label = OpLabel">;
625-
let isBarrier = 1, isTerminator=1 in {
625+
let isBarrier = 1, isTerminator = 1, isBranch = 1 in {
626626
def OpBranch: Op<249, (outs), (ins unknown:$label), "OpBranch $label">;
627627
def OpBranchConditional: Op<250, (outs), (ins ID:$cond, unknown:$true, unknown:$false, variable_ops),
628628
"OpBranchConditional $cond $true $false">;
629629
def OpSwitch: Op<251, (outs), (ins ID:$sel, ID:$dflt, variable_ops), "OpSwitch $sel $dflt">;
630630
}
631-
let isReturn = 1, hasDelaySlot=0, isBarrier = 0, isTerminator=1, isNotDuplicable = 1 in {
631+
let isReturn = 1, hasDelaySlot = 0, isBarrier = 0, isTerminator = 1, isNotDuplicable = 1 in {
632632
def OpKill: SimpleOp<"OpKill", 252>;
633633
def OpReturn: SimpleOp<"OpReturn", 253>;
634634
def OpReturnValue: Op<254, (outs), (ins ID:$ret), "OpReturnValue $ret">;
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
; The goal of this test case is to ensure that translation does not crash when during branching
2+
; optimization analyzeBranch() requires helper methods of removeBranch() and insertBranch()
3+
; to manage subsequent operations.
4+
5+
; RUN: llc -O0 -mtriple=spirv64-unknown-linux %s -o - | FileCheck %s
6+
; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
7+
8+
; RUN: llc -O0 -mtriple=spirv32-unknown-linux %s -o - | FileCheck %s
9+
; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
10+
11+
; CHECK: OpFunction
12+
13+
%struct = type { %arr }
14+
%arr = type { [3 x i64] }
15+
16+
@.str.6 = private unnamed_addr addrspace(1) constant [3 x i8] c", \00", align 1
17+
@.str.20 = private unnamed_addr addrspace(1) constant [6 x i8] c"item(\00", align 1
18+
@.str.21 = private unnamed_addr addrspace(1) constant [8 x i8] c"range: \00", align 1
19+
@.str.22 = private unnamed_addr addrspace(1) constant [7 x i8] c", id: \00", align 1
20+
21+
define spir_func i32 @foo(ptr addrspace(4) %Buf, ptr addrspace(4) %Item) {
22+
entry:
23+
%ref.tmp = alloca %struct
24+
%ref.tmp7 = alloca %struct
25+
br label %for.cond.i
26+
27+
for.cond.i: ; preds = %for.inc.i, %entry
28+
%Len.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.inc.i ]
29+
%idxprom.i = zext i32 %Len.0.i to i64
30+
%arrayidx.i = getelementptr inbounds i8, ptr addrspace(1) @.str.20, i64 %idxprom.i
31+
%0 = load i8, ptr addrspace(1) %arrayidx.i
32+
%cmp.not.i = icmp eq i8 %0, 0
33+
br i1 %cmp.not.i, label %for.cond1.i, label %for.inc.i
34+
35+
for.inc.i: ; preds = %for.cond.i
36+
%inc.i = add i32 %Len.0.i, 1
37+
br label %for.cond.i, !llvm.loop !1
38+
39+
for.cond1.i: ; preds = %for.body3.i, %for.cond.i
40+
%I.0.i = phi i32 [ %inc9.i, %for.body3.i ], [ 0, %for.cond.i ]
41+
%cmp2.i = icmp ult i32 %I.0.i, %Len.0.i
42+
br i1 %cmp2.i, label %for.body3.i, label %for.cond.preheader
43+
44+
for.cond.preheader: ; preds = %for.cond1.i
45+
%MIndex.i = getelementptr inbounds i8, ptr addrspace(4) %Item, i64 24
46+
br label %for.cond
47+
48+
for.body3.i: ; preds = %for.cond1.i
49+
%idxprom4.i = zext i32 %I.0.i to i64
50+
%arrayidx5.i = getelementptr inbounds i8, ptr addrspace(1) @.str.20, i64 %idxprom4.i
51+
%1 = load i8, ptr addrspace(1) %arrayidx5.i
52+
%arrayidx7.i = getelementptr inbounds i8, ptr addrspace(4) %Buf, i64 %idxprom4.i
53+
store i8 %1, ptr addrspace(4) %arrayidx7.i
54+
%inc9.i = add nuw i32 %I.0.i, 1
55+
br label %for.cond1.i, !llvm.loop !2
56+
57+
for.cond: ; preds = %exit, %for.cond.preheader
58+
%Len.0 = phi i32 [ %add9, %exit ], [ %Len.0.i, %for.cond.preheader ]
59+
%I.0 = phi i32 [ %inc, %exit ], [ 0, %for.cond.preheader ]
60+
%cmp = icmp ult i32 %I.0, 2
61+
br i1 %cmp, label %for.body, label %for.cond.cleanup
62+
63+
for.cond.cleanup: ; preds = %for.cond
64+
%inc10 = add i32 %Len.0, 1
65+
%idxprom = zext i32 %Len.0 to i64
66+
%arrayidx = getelementptr inbounds i8, ptr addrspace(4) %Buf, i64 %idxprom
67+
store i8 41, ptr addrspace(4) %arrayidx
68+
ret i32 %inc10
69+
70+
for.body: ; preds = %for.cond
71+
%idx.ext = zext i32 %Len.0 to i64
72+
%add.ptr = getelementptr inbounds i8, ptr addrspace(4) %Buf, i64 %idx.ext
73+
%cmp1 = icmp eq i32 %I.0, 0
74+
%cond = select i1 %cmp1, ptr addrspace(1) @.str.21, ptr addrspace(1) @.str.22
75+
br label %for.cond.i25
76+
77+
for.cond.i25: ; preds = %for.inc.i30, %for.body
78+
%Len.0.i26 = phi i32 [ 0, %for.body ], [ %inc.i31, %for.inc.i30 ]
79+
%idxprom.i27 = zext i32 %Len.0.i26 to i64
80+
%arrayidx.i28 = getelementptr inbounds i8, ptr addrspace(1) %cond, i64 %idxprom.i27
81+
%2 = load i8, ptr addrspace(1) %arrayidx.i28
82+
%cmp.not.i29 = icmp eq i8 %2, 0
83+
br i1 %cmp.not.i29, label %for.cond1.i33, label %for.inc.i30
84+
85+
for.inc.i30: ; preds = %for.cond.i25
86+
%inc.i31 = add i32 %Len.0.i26, 1
87+
br label %for.cond.i25, !llvm.loop !1
88+
89+
for.cond1.i33: ; preds = %for.body3.i36, %for.cond.i25
90+
%I.0.i34 = phi i32 [ %inc9.i40, %for.body3.i36 ], [ 0, %for.cond.i25 ]
91+
%cmp2.i35 = icmp ult i32 %I.0.i34, %Len.0.i26
92+
br i1 %cmp2.i35, label %for.body3.i36, label %detail.exit
93+
94+
for.body3.i36: ; preds = %for.cond1.i33
95+
%idxprom4.i37 = zext i32 %I.0.i34 to i64
96+
%arrayidx5.i38 = getelementptr inbounds i8, ptr addrspace(1) %cond, i64 %idxprom4.i37
97+
%3 = load i8, ptr addrspace(1) %arrayidx5.i38
98+
%arrayidx7.i39 = getelementptr inbounds i8, ptr addrspace(4) %add.ptr, i64 %idxprom4.i37
99+
store i8 %3, ptr addrspace(4) %arrayidx7.i39
100+
%inc9.i40 = add nuw i32 %I.0.i34, 1
101+
br label %for.cond1.i33, !llvm.loop !2
102+
103+
detail.exit: ; preds = %for.cond1.i33
104+
%add3 = add i32 %Len.0, %Len.0.i26
105+
%idx.ext4 = zext i32 %add3 to i64
106+
%add.ptr5 = getelementptr inbounds i8, ptr addrspace(4) %Buf, i64 %idx.ext4
107+
br i1 %cmp1, label %cond.true, label %cond.false
108+
109+
cond.true: ; preds = %detail.exit
110+
call void @llvm.memcpy.p0.p4.i64(ptr align 8 %ref.tmp7, ptr addrspace(4) align 8 %Item, i64 24, i1 false)
111+
call void @llvm.memset.p0.i64(ptr align 8 %ref.tmp, i8 0, i64 24, i1 false)
112+
br label %for.cond.i42
113+
114+
for.cond.i42: ; preds = %for.body.i, %cond.true
115+
%i.0.i = phi i32 [ 0, %cond.true ], [ %inc.i45, %for.body.i ]
116+
%cmp.i = icmp ult i32 %i.0.i, 3
117+
br i1 %cmp.i, label %for.body.i, label %cond.end
118+
119+
for.body.i: ; preds = %for.cond.i42
120+
%idxprom.i43 = zext nneg i32 %i.0.i to i64
121+
%arrayidx.i44 = getelementptr inbounds [3 x i64], ptr %ref.tmp7, i64 0, i64 %idxprom.i43
122+
%4 = load i64, ptr %arrayidx.i44, align 8
123+
%arrayidx.i.i = getelementptr inbounds [3 x i64], ptr %ref.tmp, i64 0, i64 %idxprom.i43
124+
store i64 %4, ptr %arrayidx.i.i, align 8
125+
%inc.i45 = add nuw nsw i32 %i.0.i, 1
126+
br label %for.cond.i42, !llvm.loop !3
127+
128+
cond.false: ; preds = %detail.exit
129+
call void @llvm.memcpy.p0.p4.i64(ptr align 8 %ref.tmp, ptr addrspace(4) align 8 %MIndex.i, i64 24, i1 false)
130+
br label %cond.end
131+
132+
cond.end: ; preds = %cond.false, %for.cond.i42
133+
store i8 123, ptr addrspace(4) %add.ptr5
134+
br label %for.cond.i46
135+
136+
for.cond.i46: ; preds = %for.inc.i52, %cond.end
137+
%Len.0.i47 = phi i32 [ 1, %cond.end ], [ %Len.1.i, %for.inc.i52 ]
138+
%I.0.i48 = phi i32 [ 0, %cond.end ], [ %inc7.i, %for.inc.i52 ]
139+
%cmp.i49 = icmp ult i32 %I.0.i48, 3
140+
br i1 %cmp.i49, label %for.body.i50, label %exit
141+
142+
for.body.i50: ; preds = %for.cond.i46
143+
%idxprom.i.i = zext nneg i32 %I.0.i48 to i64
144+
%arrayidx.i.i51 = getelementptr inbounds [3 x i64], ptr %ref.tmp, i64 0, i64 %idxprom.i.i
145+
%5 = load i64, ptr %arrayidx.i.i51, align 8
146+
%idx.ext.i = zext i32 %Len.0.i47 to i64
147+
%add.ptr.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr5, i64 %idx.ext.i
148+
br label %do.body.i.i.i
149+
150+
do.body.i.i.i: ; preds = %do.body.i.i.i, %for.body.i50
151+
%Val.addr.0.i.i.i = phi i64 [ %5, %for.body.i50 ], [ %div.i.i.i, %do.body.i.i.i ]
152+
%NumDigits.0.i.i.i = phi i32 [ 0, %for.body.i50 ], [ %inc.i.i.i, %do.body.i.i.i ]
153+
%Val.addr.0.i.i.i.frozen = freeze i64 %Val.addr.0.i.i.i
154+
%div.i.i.i = udiv i64 %Val.addr.0.i.i.i.frozen, 10
155+
%6 = mul i64 %div.i.i.i, 10
156+
%rem.i.i.i.decomposed = sub i64 %Val.addr.0.i.i.i.frozen, %6
157+
%7 = trunc i64 %rem.i.i.i.decomposed to i8
158+
%retval.0.i.i.i.i = or disjoint i8 %7, 48
159+
%inc.i.i.i = add i32 %NumDigits.0.i.i.i, 1
160+
%idxprom.i.i.i = zext i32 %NumDigits.0.i.i.i to i64
161+
%arrayidx.i.i.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr.i, i64 %idxprom.i.i.i
162+
store i8 %retval.0.i.i.i.i, ptr addrspace(4) %arrayidx.i.i.i
163+
%tobool.not.i.i.i = icmp ult i64 %Val.addr.0.i.i.i, 10
164+
br i1 %tobool.not.i.i.i, label %while.cond.i.i.i, label %do.body.i.i.i, !llvm.loop !4
165+
166+
while.cond.i.i.i: ; preds = %while.body.i.i.i, %do.body.i.i.i
167+
%J.0.i.i.i = phi i32 [ %inc.i54.i.i, %while.body.i.i.i ], [ 0, %do.body.i.i.i ]
168+
%I.0.in.i.i.i = phi i32 [ %I.0.i.i.i, %while.body.i.i.i ], [ %inc.i.i.i, %do.body.i.i.i ]
169+
%I.0.i.i.i = add i32 %I.0.in.i.i.i, -1
170+
%cmp.i.i.i = icmp sgt i32 %I.0.i.i.i, %J.0.i.i.i
171+
br i1 %cmp.i.i.i, label %while.body.i.i.i, label %enable.exit
172+
173+
while.body.i.i.i: ; preds = %while.cond.i.i.i
174+
%idxprom.i52.i.i = sext i32 %I.0.i.i.i to i64
175+
%arrayidx.i53.i.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr.i, i64 %idxprom.i52.i.i
176+
%8 = load i8, ptr addrspace(4) %arrayidx.i53.i.i
177+
%idxprom1.i.i.i = zext nneg i32 %J.0.i.i.i to i64
178+
%arrayidx2.i.i.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr.i, i64 %idxprom1.i.i.i
179+
%9 = load i8, ptr addrspace(4) %arrayidx2.i.i.i
180+
store i8 %9, ptr addrspace(4) %arrayidx.i53.i.i
181+
store i8 %8, ptr addrspace(4) %arrayidx2.i.i.i
182+
%inc.i54.i.i = add nuw nsw i32 %J.0.i.i.i, 1
183+
br label %while.cond.i.i.i, !llvm.loop !5
184+
185+
enable.exit: ; preds = %while.cond.i.i.i
186+
%add.i = add i32 %Len.0.i47, %inc.i.i.i
187+
%cmp2.not.i = icmp eq i32 %I.0.i48, 2
188+
br i1 %cmp2.not.i, label %for.inc.i52, label %if.then.i
189+
190+
if.then.i: ; preds = %enable.exit
191+
%idx.ext3.i = zext i32 %add.i to i64
192+
%add.ptr4.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr5, i64 %idx.ext3.i
193+
br label %for.cond.i.i
194+
195+
for.cond.i.i: ; preds = %for.inc.i.i, %if.then.i
196+
%Len.0.i.i = phi i32 [ 0, %if.then.i ], [ %inc.i.i, %for.inc.i.i ]
197+
%idxprom.i24.i = zext i32 %Len.0.i.i to i64
198+
%arrayidx.i25.i = getelementptr inbounds i8, ptr addrspace(1) @.str.6, i64 %idxprom.i24.i
199+
%10 = load i8, ptr addrspace(1) %arrayidx.i25.i
200+
%cmp.not.i.i = icmp eq i8 %10, 0
201+
br i1 %cmp.not.i.i, label %for.cond1.i.i, label %for.inc.i.i
202+
203+
for.inc.i.i: ; preds = %for.cond.i.i
204+
%inc.i.i = add i32 %Len.0.i.i, 1
205+
br label %for.cond.i.i, !llvm.loop !1
206+
207+
for.cond1.i.i: ; preds = %for.body3.i.i, %for.cond.i.i
208+
%I.0.i.i = phi i32 [ %inc9.i.i, %for.body3.i.i ], [ 0, %for.cond.i.i ]
209+
%cmp2.i.i = icmp ult i32 %I.0.i.i, %Len.0.i.i
210+
br i1 %cmp2.i.i, label %for.body3.i.i, label %append.exit
211+
212+
for.body3.i.i: ; preds = %for.cond1.i.i
213+
%idxprom4.i.i = zext i32 %I.0.i.i to i64
214+
%arrayidx5.i.i = getelementptr inbounds i8, ptr addrspace(1) @.str.6, i64 %idxprom4.i.i
215+
%11 = load i8, ptr addrspace(1) %arrayidx5.i.i
216+
%arrayidx7.i.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr4.i, i64 %idxprom4.i.i
217+
store i8 %11, ptr addrspace(4) %arrayidx7.i.i
218+
%inc9.i.i = add nuw i32 %I.0.i.i, 1
219+
br label %for.cond1.i.i, !llvm.loop !2
220+
221+
append.exit: ; preds = %for.cond1.i.i
222+
%add6.i = add i32 %add.i, %Len.0.i.i
223+
br label %for.inc.i52
224+
225+
for.inc.i52: ; preds = %append.exit, %enable.exit
226+
%Len.1.i = phi i32 [ %add6.i, %append.exit ], [ %add.i, %enable.exit ]
227+
%inc7.i = add nuw nsw i32 %I.0.i48, 1
228+
br label %for.cond.i46, !llvm.loop !6
229+
230+
exit: ; preds = %for.cond.i46
231+
%inc8.i = add i32 %Len.0.i47, 1
232+
%idxprom9.i = zext i32 %Len.0.i47 to i64
233+
%arrayidx10.i = getelementptr inbounds i8, ptr addrspace(4) %add.ptr5, i64 %idxprom9.i
234+
store i8 125, ptr addrspace(4) %arrayidx10.i
235+
%add9 = add i32 %add3, %inc8.i
236+
%inc = add nuw nsw i32 %I.0, 1
237+
br label %for.cond, !llvm.loop !7
238+
}
239+
240+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
241+
declare void @llvm.memcpy.p0.p4.i64(ptr noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg)
242+
243+
!0 = !{!"llvm.loop.mustprogress"}
244+
!1 = distinct !{!1, !0}
245+
!2 = distinct !{!2, !0}
246+
!3 = distinct !{!3, !0}
247+
!4 = distinct !{!4, !0}
248+
!5 = distinct !{!5, !0}
249+
!6 = distinct !{!6, !0}
250+
!7 = distinct !{!7, !0}

0 commit comments

Comments
 (0)