Skip to content

Commit 5d9c717

Browse files
authored
[GISel] Fold shifts to constant result. (llvm#123510)
This resolves llvm#123212
1 parent 8294459 commit 5d9c717

File tree

13 files changed

+756
-614
lines changed

13 files changed

+756
-614
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -840,8 +840,10 @@ class CombinerHelper {
840840
bool matchRedundantBinOpInEquality(MachineInstr &MI,
841841
BuildFnTy &MatchInfo) const;
842842

843-
/// Match shifts greater or equal to the bitwidth of the operation.
844-
bool matchShiftsTooBig(MachineInstr &MI) const;
843+
/// Match shifts greater or equal to the range (the bitwidth of the result
844+
/// datatype, or the effective bitwidth of the source value).
845+
bool matchShiftsTooBig(MachineInstr &MI,
846+
std::optional<int64_t> &MatchInfo) const;
845847

846848
/// Match constant LHS ops that should be commuted.
847849
bool matchCommuteConstantToRHS(MachineInstr &MI) const;

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,23 @@ def ptr_add_immed_chain : GICombineRule<
306306
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
307307
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
308308

309+
def shift_const_op : GICombinePatFrag<
310+
(outs root:$dst), (ins),
311+
!foreach(op,
312+
[G_SHL, G_ASHR, G_LSHR],
313+
(pattern (op $dst, $shifted, $amt)))>;
314+
def shift_result_matchdata : GIDefMatchData<"std::optional<int64_t>">;
309315
def shifts_too_big : GICombineRule<
310-
(defs root:$root),
311-
(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
312-
[{ return Helper.matchShiftsTooBig(*${root}); }]),
313-
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
316+
(defs root:$root, shift_result_matchdata:$matchinfo),
317+
(match (shift_const_op $root):$mi,
318+
[{ return Helper.matchShiftsTooBig(*${mi}, ${matchinfo}); }]),
319+
(apply [{
320+
if (${matchinfo}) {
321+
Helper.replaceInstWithConstant(*${mi}, *${matchinfo});
322+
} else {
323+
Helper.replaceInstWithUndef(*${mi});
324+
}
325+
}])>;
314326

315327
// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
316328
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6590,12 +6590,57 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
65906590
return CmpInst::isEquality(Pred) && Y.isValid();
65916591
}
65926592

6593-
bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) const {
6593+
/// Return the minimum useless shift amount that results in complete loss of the
6594+
/// source value. Return std::nullopt when it cannot determine a value.
6595+
static std::optional<unsigned>
6596+
getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
6597+
std::optional<int64_t> &Result) {
6598+
assert(Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
6599+
Opcode == TargetOpcode::G_ASHR && "Expect G_SHL, G_LSHR or G_ASHR.");
6600+
auto SignificantBits = 0;
6601+
switch (Opcode) {
6602+
case TargetOpcode::G_SHL:
6603+
SignificantBits = ValueKB.countMinTrailingZeros();
6604+
Result = 0;
6605+
break;
6606+
case TargetOpcode::G_LSHR:
6607+
Result = 0;
6608+
SignificantBits = ValueKB.countMinLeadingZeros();
6609+
break;
6610+
case TargetOpcode::G_ASHR:
6611+
if (ValueKB.isNonNegative()) {
6612+
SignificantBits = ValueKB.countMinLeadingZeros();
6613+
Result = 0;
6614+
} else if (ValueKB.isNegative()) {
6615+
SignificantBits = ValueKB.countMinLeadingOnes();
6616+
Result = -1;
6617+
} else {
6618+
// Cannot determine shift result.
6619+
Result = std::nullopt;
6620+
}
6621+
break;
6622+
default:
6623+
break;
6624+
}
6625+
return ValueKB.getBitWidth() - SignificantBits;
6626+
}
6627+
6628+
bool CombinerHelper::matchShiftsTooBig(
6629+
MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
6630+
Register ShiftVal = MI.getOperand(1).getReg();
65946631
Register ShiftReg = MI.getOperand(2).getReg();
65956632
LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
65966633
auto IsShiftTooBig = [&](const Constant *C) {
65976634
auto *CI = dyn_cast<ConstantInt>(C);
6598-
return CI && CI->uge(ResTy.getScalarSizeInBits());
6635+
if (!CI)
6636+
return false;
6637+
if (CI->uge(ResTy.getScalarSizeInBits())) {
6638+
MatchInfo = std::nullopt;
6639+
return true;
6640+
}
6641+
auto OptMaxUsefulShift = getMinUselessShift(KB->getKnownBits(ShiftVal),
6642+
MI.getOpcode(), MatchInfo);
6643+
return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
65996644
};
66006645
return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
66016646
}
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck %s
3+
4+
---
5+
name: combine_ashr
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $vgpr0, $vgpr1, $vgpr31
10+
11+
liveins: $vgpr0, $vgpr1
12+
13+
; CHECK-LABEL: name: combine_ashr
14+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
17+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
18+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
19+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
20+
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
21+
; CHECK-NEXT: SI_RETURN
22+
%0:_(s32) = COPY $vgpr0
23+
%1:_(s32) = COPY $vgpr1
24+
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
25+
%3:_(s32) = G_CONSTANT i32 10
26+
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
27+
%5:_(s32) = G_ASHR %4, %3(s32)
28+
G_STORE %5(s32), %2(p0) :: (store (s32))
29+
SI_RETURN
30+
31+
...
32+
---
33+
name: combine_lshr
34+
tracksRegLiveness: true
35+
body: |
36+
bb.0:
37+
liveins: $vgpr0, $vgpr1, $vgpr31
38+
39+
liveins: $vgpr0, $vgpr1
40+
41+
; CHECK-LABEL: name: combine_lshr
42+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
45+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
46+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
47+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
48+
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
49+
; CHECK-NEXT: SI_RETURN
50+
%0:_(s32) = COPY $vgpr0
51+
%1:_(s32) = COPY $vgpr1
52+
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
53+
%3:_(s32) = G_CONSTANT i32 10
54+
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
55+
%5:_(s32) = G_LSHR %4, %3(s32)
56+
G_STORE %5(s32), %2(p0) :: (store (s32))
57+
SI_RETURN
58+
59+
...
60+
---
61+
name: combine_shl
62+
tracksRegLiveness: true
63+
body: |
64+
bb.0:
65+
liveins: $vgpr0, $vgpr1, $vgpr31
66+
67+
liveins: $vgpr0, $vgpr1
68+
69+
; CHECK-LABEL: name: combine_shl
70+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
71+
; CHECK-NEXT: {{ $}}
72+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
73+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
74+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
75+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
76+
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
77+
; CHECK-NEXT: SI_RETURN
78+
%0:_(s32) = COPY $vgpr0
79+
%1:_(s32) = COPY $vgpr1
80+
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
81+
%3:_(s32) = G_CONSTANT i32 16
82+
%4:_(s32) = G_CONSTANT i32 4294901760
83+
%5:_(s32) = G_SHL %4, %3(s32)
84+
G_STORE %5(s32), %2(p0) :: (store (s32))
85+
SI_RETURN
86+
87+
...
88+
---
89+
name: combine_ashr2
90+
tracksRegLiveness: true
91+
body: |
92+
bb.0:
93+
liveins: $vgpr0, $vgpr1, $vgpr31
94+
95+
liveins: $vgpr0, $vgpr1
96+
97+
; CHECK-LABEL: name: combine_ashr2
98+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
101+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
102+
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
103+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1
104+
; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p0) :: (store (s8))
105+
; CHECK-NEXT: SI_RETURN
106+
%0:_(s32) = COPY $vgpr0
107+
%1:_(s32) = COPY $vgpr1
108+
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
109+
%3:_(s32) = G_CONSTANT i32 1
110+
%4:_(s8) = G_CONSTANT i8 -2
111+
%5:_(s8) = G_ASHR %4, %3(s32)
112+
G_STORE %5(s8), %2(p0) :: (store (s8))
113+
SI_RETURN
114+
115+
...
116+
---
117+
name: combine_vector_lshr
118+
tracksRegLiveness: true
119+
body: |
120+
bb.0:
121+
liveins: $vgpr0, $vgpr1, $vgpr31
122+
123+
liveins: $vgpr0, $vgpr1
124+
125+
; CHECK-LABEL: name: combine_vector_lshr
126+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
127+
; CHECK-NEXT: {{ $}}
128+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
129+
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
130+
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
131+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
132+
%0:_(<2 x s32>) = G_IMPLICIT_DEF
133+
%1:_(s32) = G_CONSTANT i32 511
134+
%2:_(s32) = G_CONSTANT i32 0
135+
%3:_(s32) = G_CONSTANT i32 1
136+
%4:_(s32) = G_CONSTANT i32 9
137+
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
138+
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
139+
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
140+
%8:_(<2 x s32>) = G_LSHR %7, %5(<2 x s32>)
141+
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
142+
$vgpr0 = COPY %9(s32)
143+
$vgpr1 = COPY %10(s32)
144+
SI_RETURN implicit $vgpr0, implicit $vgpr1
145+
146+
...
147+
---
148+
name: combine_vector_shl
149+
tracksRegLiveness: true
150+
body: |
151+
bb.0:
152+
liveins: $vgpr0, $vgpr1, $vgpr31
153+
154+
liveins: $vgpr0, $vgpr1
155+
156+
; CHECK-LABEL: name: combine_vector_shl
157+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
158+
; CHECK-NEXT: {{ $}}
159+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
160+
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
161+
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
162+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
163+
%0:_(<2 x s32>) = G_IMPLICIT_DEF
164+
%1:_(s32) = G_CONSTANT i32 4294901760
165+
%2:_(s32) = G_CONSTANT i32 0
166+
%3:_(s32) = G_CONSTANT i32 1
167+
%4:_(s32) = G_CONSTANT i32 16
168+
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
169+
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
170+
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
171+
%8:_(<2 x s32>) = G_SHL %7, %5(<2 x s32>)
172+
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
173+
$vgpr0 = COPY %9(s32)
174+
$vgpr1 = COPY %10(s32)
175+
SI_RETURN implicit $vgpr0, implicit $vgpr1
176+
177+
...
178+
---
179+
name: combine_vector_ashr
180+
tracksRegLiveness: true
181+
body: |
182+
bb.0:
183+
liveins: $vgpr0, $vgpr1, $vgpr31
184+
185+
liveins: $vgpr0, $vgpr1
186+
187+
; CHECK-LABEL: name: combine_vector_ashr
188+
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
189+
; CHECK-NEXT: {{ $}}
190+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
191+
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
192+
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
193+
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
194+
%0:_(<2 x s32>) = G_IMPLICIT_DEF
195+
%1:_(s32) = G_CONSTANT i32 -1
196+
%2:_(s32) = G_CONSTANT i32 0
197+
%3:_(s32) = G_CONSTANT i32 1
198+
%4:_(s32) = G_CONSTANT i32 1
199+
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
200+
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
201+
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
202+
%8:_(<2 x s32>) = G_ASHR %7, %5(<2 x s32>)
203+
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
204+
$vgpr0 = COPY %9(s32)
205+
$vgpr1 = COPY %10(s32)
206+
SI_RETURN implicit $vgpr0, implicit $vgpr1
207+
208+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -374,23 +374,15 @@ body: |
374374
; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
375375
; GFX6: liveins: $vgpr0
376376
; GFX6-NEXT: {{ $}}
377-
; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
378-
; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
379-
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
380-
; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
381-
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
382-
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
377+
; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
378+
; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
383379
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
384380
;
385381
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
386382
; GFX9: liveins: $vgpr0
387383
; GFX9-NEXT: {{ $}}
388-
; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
389-
; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
390-
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
391-
; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
392-
; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
393-
; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
384+
; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
385+
; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
394386
; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
395387
%zero:_(s16) = G_CONSTANT i16 0
396388
%zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -246,23 +246,15 @@ body: |
246246
; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
247247
; GFX6: liveins: $vgpr0, $vgpr1
248248
; GFX6-NEXT: {{ $}}
249-
; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
250-
; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
251-
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
252-
; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
253-
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
254-
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
249+
; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
250+
; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
255251
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
256252
;
257253
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
258254
; GFX9: liveins: $vgpr0, $vgpr1
259255
; GFX9-NEXT: {{ $}}
260-
; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
261-
; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
262-
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
263-
; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
264-
; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
265-
; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
256+
; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
257+
; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
266258
; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
267259
%zero:_(s16) = G_CONSTANT i16 0
268260
%zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1434,13 +1434,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14341434
; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
14351435
; SI: ; %bb.0:
14361436
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437+
; SI-NEXT: v_ffbh_i32_e32 v2, 0
1438+
; SI-NEXT: v_add_i32_e32 v2, vcc, -1, v2
14371439
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
1438-
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
1439-
; SI-NEXT: v_ffbh_i32_e32 v3, 0
1440-
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
1441-
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
14421440
; SI-NEXT: v_mov_b32_e32 v1, 0
1443-
; SI-NEXT: v_min_u32_e32 v2, v3, v2
1441+
; SI-NEXT: v_min_u32_e32 v2, 32, v2
14441442
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
14451443
; SI-NEXT: v_min_u32_e32 v0, 1, v0
14461444
; SI-NEXT: v_or_b32_e32 v0, v1, v0
@@ -1452,13 +1450,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
14521450
; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
14531451
; VI: ; %bb.0:
14541452
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453+
; VI-NEXT: v_ffbh_i32_e32 v2, 0
1454+
; VI-NEXT: v_add_u32_e32 v2, vcc, -1, v2
14551455
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
1456-
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
1457-
; VI-NEXT: v_ffbh_i32_e32 v3, 0
1458-
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
1459-
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
14601456
; VI-NEXT: v_mov_b32_e32 v1, 0
1461-
; VI-NEXT: v_min_u32_e32 v2, v3, v2
1457+
; VI-NEXT: v_min_u32_e32 v2, 32, v2
14621458
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
14631459
; VI-NEXT: v_min_u32_e32 v0, 1, v0
14641460
; VI-NEXT: v_or_b32_e32 v0, v1, v0

0 commit comments

Comments
 (0)