Skip to content

Commit b546096

Browse files
authored
[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts (#97731)
Basic initial patch for #96884 that just handles case where we bitcast between float/integers of the same element width
1 parent e4b2842 commit b546096

File tree

2 files changed

+17
-60
lines changed

2 files changed

+17
-60
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,6 +1925,15 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
19251925
} else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
19261926
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
19271927
continue;
1928+
} else if (auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
1929+
// TODO: Handle vector widening/narrowing bitcasts.
1930+
auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
1931+
auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
1932+
if (DstTy && SrcTy &&
1933+
SrcTy->getNumElements() == DstTy->getNumElements()) {
1934+
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
1935+
continue;
1936+
}
19281937
} else if (isa<SelectInst>(FrontU)) {
19291938
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
19301939
Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 8 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,8 @@
1414
define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
1515
; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64(
1616
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
17-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
18-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x double> [[A:%.*]] to <4 x i64>
19-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
20-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[B:%.*]] to <4 x i64>
21-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
22-
; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP5]], <2 x i64> [[TMP3]]
23-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> <i32 2, i32 3>
24-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x double> [[A]] to <4 x i64>
25-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <2 x i32> <i32 2, i32 3>
26-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[B]] to <4 x i64>
27-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 2, i32 3>
28-
; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[TMP9]]
29-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30-
; CHECK-NEXT: [[RES:%.*]] = bitcast <4 x i64> [[TMP13]] to <4 x double>
31-
; CHECK-NEXT: ret <4 x double> [[RES]]
17+
; CHECK-NEXT: [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
18+
; CHECK-NEXT: ret <4 x double> [[DOTV]]
3219
;
3320
%a.bc = bitcast <4 x double> %a to <32 x i8>
3421
%b.bc = bitcast <4 x double> %b to <32 x i8>
@@ -51,21 +38,8 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b,
5138
define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
5239
; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32(
5340
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
54-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
55-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32>
56-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
57-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[B:%.*]] to <8 x i32>
58-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
59-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]]
60-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
61-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
62-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
63-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
64-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
65-
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]]
66-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
67-
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <8 x float>
68-
; CHECK-NEXT: ret <8 x float> [[RES]]
41+
; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
42+
; CHECK-NEXT: ret <8 x float> [[DOTV]]
6943
;
7044
%a.bc = bitcast <8 x float> %a to <32 x i8>
7145
%b.bc = bitcast <8 x float> %b to <32 x i8>
@@ -228,21 +202,8 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>
228202
define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
229203
; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64(
230204
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
231-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
232-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x double> [[A:%.*]] to <8 x i64>
233-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
234-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[B:%.*]] to <8 x i64>
235-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
236-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP5]], <4 x i64> [[TMP3]]
237-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
238-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[A]] to <8 x i64>
239-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
240-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[B]] to <8 x i64>
241-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
242-
; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[TMP9]]
243-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
244-
; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i64> [[TMP13]] to <8 x double>
245-
; CHECK-NEXT: ret <8 x double> [[RES]]
205+
; CHECK-NEXT: [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
206+
; CHECK-NEXT: ret <8 x double> [[DOTV]]
246207
;
247208
%a.bc = bitcast <8 x double> %a to <64 x i8>
248209
%b.bc = bitcast <8 x double> %b to <64 x i8>
@@ -265,21 +226,8 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b,
265226
define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
266227
; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32(
267228
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
268-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
269-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
270-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
271-
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
272-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
273-
; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]]
274-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
275-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[A]] to <16 x i32>
276-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
277-
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[B]] to <16 x i32>
278-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
279-
; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]]
280-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
281-
; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <16 x float>
282-
; CHECK-NEXT: ret <16 x float> [[RES]]
229+
; CHECK-NEXT: [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
230+
; CHECK-NEXT: ret <16 x float> [[DOTV]]
283231
;
284232
%a.bc = bitcast <16 x float> %a to <64 x i8>
285233
%b.bc = bitcast <16 x float> %b to <64 x i8>

0 commit comments

Comments
 (0)