Skip to content

Commit 1d06f41

Browse files
authored
[VectorCombine] foldBitcastShuffle - peek through any residual bitcasts before creating a new bitcast on top (#86119)
Encountered while working on #67803, wading through the chains of bitcasts that SSE intrinsics introduces - this patch helps prevents cases where the bitcast chains aren't cleared out and we can't perform further combines until after InstCombine/InstSimplify has run.
1 parent 4624668 commit 1d06f41

File tree

3 files changed

+14
-10
lines changed

3 files changed

+14
-10
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,14 @@ class VectorCombine {
135135
};
136136
} // namespace
137137

138+
/// Return the source operand of a potentially bitcasted value. If there is no
139+
/// bitcast, return the input value itself.
140+
static Value *peekThroughBitcasts(Value *V) {
141+
while (auto *BitCast = dyn_cast<BitCastInst>(V))
142+
V = BitCast->getOperand(0);
143+
return V;
144+
}
145+
138146
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) {
139147
// Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan.
140148
// The widened load may load data from dirty regions or create data races
@@ -751,8 +759,8 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) {
751759

752760
// bitcast (shuf V0, V1, MaskC) --> shuf (bitcast V0), (bitcast V1), MaskC'
753761
++NumShufOfBitcast;
754-
Value *CastV0 = Builder.CreateBitCast(V0, NewShuffleTy);
755-
Value *CastV1 = Builder.CreateBitCast(V1, NewShuffleTy);
762+
Value *CastV0 = Builder.CreateBitCast(peekThroughBitcasts(V0), NewShuffleTy);
763+
Value *CastV1 = Builder.CreateBitCast(peekThroughBitcasts(V1), NewShuffleTy);
756764
Value *Shuf = Builder.CreateShuffleVector(CastV0, CastV1, NewMask);
757765
replaceValue(I, *Shuf);
758766
return true;

llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
133133
; SSE-NEXT: ret <2 x i64> [[BC3]]
134134
;
135135
; AVX-LABEL: @PR35454_1(
136-
; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
137-
; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8>
136+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
138137
; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
139138
; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
140139
; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
@@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
164163
; SSE-NEXT: ret <2 x i64> [[BC3]]
165164
;
166165
; AVX-LABEL: @PR35454_2(
167-
; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
168-
; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16>
166+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
169167
; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
170168
; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
171169
; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>

llvm/test/Transforms/VectorCombine/X86/shuffle.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
133133
; SSE-NEXT: ret <2 x i64> [[BC3]]
134134
;
135135
; AVX-LABEL: @PR35454_1(
136-
; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
137-
; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8>
136+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
138137
; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
139138
; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
140139
; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
@@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
164163
; SSE-NEXT: ret <2 x i64> [[BC3]]
165164
;
166165
; AVX-LABEL: @PR35454_2(
167-
; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
168-
; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16>
166+
; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
169167
; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
170168
; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
171169
; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>

0 commit comments

Comments
 (0)