Skip to content

Commit 807cd7b

Browse files
committed
[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison
Deal with these possibly poison generating operations in SelectionDAG::canCreateUndefOrPoison: EXTRACT_VECTOR_ELT, SRL, SRA Also handle these operations that only propagate poison/undef based on the input operands: SADDSAT, UADDSAT, SSUBSAT, USUBSAT, MULHU, MULHS, SMIN, SMAX, UMIN, UMAX Also handle the integer comparison variant of SELECT_CC. The goal here is to allow pushing freeze through these operations when allowed, as well as letting analyses such as isGuaranteedNotToBeUndefOrPoison to not break on such operations.
1 parent 4d0f79e commit 807cd7b

14 files changed

+659
-669
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15383,6 +15383,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1538315383
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
1538415384
return N0;
1538515385

15386+
// There is a reverse transform in visitEXTRACT_VECTOR_ELT, so we need to
15387+
// avoid infinite looping by not pulling freeze through EXTRACT_VECTOR_ELT
15388+
// here.
15389+
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT)
15390+
return SDValue();
15391+
1538615392
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1538715393
// Try to push freeze through instructions that propagate but don't produce
1538815394
// poison as far as possible. If an operand of freeze follows three

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5073,6 +5073,16 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
50735073
case ISD::FREEZE:
50745074
case ISD::CONCAT_VECTORS:
50755075
case ISD::INSERT_SUBVECTOR:
5076+
case ISD::SADDSAT:
5077+
case ISD::UADDSAT:
5078+
case ISD::SSUBSAT:
5079+
case ISD::USUBSAT:
5080+
case ISD::MULHU:
5081+
case ISD::MULHS:
5082+
case ISD::SMIN:
5083+
case ISD::SMAX:
5084+
case ISD::UMIN:
5085+
case ISD::UMAX:
50765086
case ISD::AND:
50775087
case ISD::XOR:
50785088
case ISD::ROTL:
@@ -5093,6 +5103,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
50935103
case ISD::BUILD_PAIR:
50945104
return false;
50955105

5106+
case ISD::SELECT_CC:
50965107
case ISD::SETCC: {
50975108
// Integer setcc cannot create undef or poison.
50985109
if (Op.getOperand(0).getValueType().isInteger())
@@ -5102,7 +5113,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51025113
// based on options and flags. The options and flags also cause special
51035114
// nonan condition codes to be used. Those condition codes may be preserved
51045115
// even if the nonan flag is dropped somewhere.
5105-
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5116+
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
5117+
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
51065118
if (((unsigned)CCCode & 0x10U))
51075119
return true;
51085120

@@ -5123,23 +5135,36 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
51235135
return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
51245136
Op->getFlags().hasNoUnsignedWrap());
51255137

5126-
case ISD::SHL:
5138+
case ISD::SHL: {
51275139
// If the max shift amount isn't in range, then the shift can create poison.
51285140
if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
51295141
return true;
51305142

51315143
// Matches hasPoisonGeneratingFlags().
51325144
return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
51335145
Op->getFlags().hasNoUnsignedWrap());
5146+
}
5147+
5148+
case ISD::SRL:
5149+
case ISD::SRA: {
5150+
// If the max shift amount isn't in range, then the shift can create poison.
5151+
if (!getValidMaximumShiftAmountConstant(Op, DemandedElts))
5152+
return true;
5153+
5154+
// Matches hasPoisonGeneratingFlags().
5155+
return ConsiderFlags && Op->getFlags().hasExact();
5156+
}
51345157

51355158
// Matches hasPoisonGeneratingFlags().
51365159
case ISD::OR:
51375160
return ConsiderFlags && Op->getFlags().hasDisjoint();
51385161

5162+
case ISD::EXTRACT_VECTOR_ELT:
51395163
case ISD::INSERT_VECTOR_ELT:{
51405164
// Ensure that the element index is in bounds.
51415165
EVT VecVT = Op.getOperand(0).getValueType();
5142-
KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1);
5166+
unsigned IdxOp = Opcode == ISD::INSERT_VECTOR_ELT ? 2 : 1;
5167+
KnownBits KnownIdx = computeKnownBits(Op.getOperand(IdxOp), Depth + 1);
51435168
return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
51445169
}
51455170

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -283,21 +283,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
283283
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
284284
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
285285
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
286-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
286+
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
287287
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
288-
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
288+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
289289
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
290-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
291-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
292290
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
291+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
292+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
293293
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
294294
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
295-
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
296-
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
295+
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
296+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
297297
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
298298
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
299-
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
300-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
299+
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
300+
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
301301
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
302302
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
303303
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
@@ -313,21 +313,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
313313
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
314314
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
315315
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
316-
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
317-
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
316+
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
317+
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
318318
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
319319
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
320-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
321-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
322-
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
320+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
321+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
322+
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
323323
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
324324
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
325-
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
326-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
325+
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
326+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
327327
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
328328
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
329-
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
330-
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
329+
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
330+
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
331331
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
332332
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
333333
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
@@ -340,18 +340,26 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
340340
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
341341
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
342342
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
343-
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
343+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
344+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
345+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
344346
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
345-
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
346-
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
347+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
348+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
349+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
350+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
347351
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
348-
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
349-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
352+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
353+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
354+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
355+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
350356
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
351-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
352-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
357+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
358+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
359+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
360+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
353361
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
354-
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
362+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
355363
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
356364
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
357365
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
@@ -404,7 +412,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
404412
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
405413
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
406414
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
407-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
415+
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
416+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
408417
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
409418
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
410419
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
@@ -440,7 +449,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
440449
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
441450
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
442451
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
443-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
452+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
453+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
444454
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
445455
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
446456
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
@@ -691,10 +701,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
691701
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
692702
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
693703
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
694-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
695-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
696-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
697-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
704+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
705+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
706+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
707+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
698708
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
699709
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
700710
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
@@ -904,14 +914,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
904914
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
905915
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
906916
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
907-
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
908-
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
909-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
910-
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
911-
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
912-
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
913-
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
914-
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
917+
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
918+
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
919+
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
920+
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
921+
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
922+
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
923+
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
924+
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
915925
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
916926
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
917927
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
@@ -1029,10 +1039,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10291039
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
10301040
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10311041
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
1032-
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1033-
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1034-
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1035-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1042+
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1043+
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1044+
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1045+
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
10361046
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
10371047
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10381048
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload

0 commit comments

Comments
 (0)