[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts #97731

RKSimon · 2024-07-04T13:56:31Z

Basic initial patch for #96884 that just handles case where we bitcast between float/intgers of the same element width

llvmbot · 2024-07-04T13:57:03Z

@llvm/pr-subscribers-llvm-transforms

Author: Simon Pilgrim (RKSimon)

Changes

Basic initial patch for #96884 that just handles case where we bitcast between float/intgers of the same element width

Full diff: https://github.com/llvm/llvm-project/pull/97731.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VectorCombine.cpp (+8)
(modified) llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll (+8-60)

diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 8c1337cabb42f..fa475d945c67d 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1925,6 +1925,14 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
       } else if (isa<UnaryOperator, TruncInst, ZExtInst, SExtInst>(FrontU)) {
         Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
         continue;
+      } else if (auto *BitCast = dyn_cast<BitCastInst>(FrontU)) {
+        // TODO: Handle vector widening/narrowing bitcasts.
+        auto *DstTy = dyn_cast<FixedVectorType>(BitCast->getDestTy());
+        auto *SrcTy = dyn_cast<FixedVectorType>(BitCast->getSrcTy());
+        if (DstTy && SrcTy && SrcTy->getNumElements() == DstTy->getNumElements()) {
+          Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
+          continue;
+        }
       } else if (isa<SelectInst>(FrontU)) {
         Worklist.push_back(generateInstLaneVectorFromOperand(Item, 0));
         Worklist.push_back(generateInstLaneVectorFromOperand(Item, 1));
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index 67c9c333987f6..75bef838aefd5 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -14,21 +14,8 @@
 define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d) {
 ; CHECK-LABEL: @x86_pblendvb_v4f64_v2f64(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x double> [[A:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x double> [[B:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP5]], <2 x i64> [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x double> [[A]] to <4 x i64>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x double> [[B]] to <4 x i64>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP12:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[TMP9]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP12]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[RES:%.*]] = bitcast <4 x i64> [[TMP13]] to <4 x double>
-; CHECK-NEXT:    ret <4 x double> [[RES]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select <4 x i1> [[CMP]], <4 x double> [[B:%.*]], <4 x double> [[A:%.*]]
+; CHECK-NEXT:    ret <4 x double> [[DOTV]]
 ;
   %a.bc = bitcast <4 x double> %a to <32 x i8>
   %b.bc = bitcast <4 x double> %b to <32 x i8>
@@ -51,21 +38,8 @@ define <4 x double> @x86_pblendvb_v4f64_v2f64(<4 x double> %a, <4 x double> %b,
 define <8 x float> @x86_pblendvb_v8f32_v4f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d) {
 ; CHECK-LABEL: @x86_pblendvb_v8f32_v4f32(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x float> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x float> [[A:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x float> [[B:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x float> [[A]] to <8 x i32>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x float> [[B]] to <8 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <8 x float>
-; CHECK-NEXT:    ret <8 x float> [[RES]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x float> [[B:%.*]], <8 x float> [[A:%.*]]
+; CHECK-NEXT:    ret <8 x float> [[DOTV]]
 ;
   %a.bc = bitcast <8 x float> %a to <32 x i8>
   %b.bc = bitcast <8 x float> %b to <32 x i8>
@@ -228,21 +202,8 @@ define <4 x i64> @x86_pblendvb_v32i8_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64>
 define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d) {
 ; CHECK-LABEL: @x86_pblendvb_v8f64_v4f64(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x double> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x double> [[A:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x double> [[B:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP5]], <4 x i64> [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x double> [[A]] to <8 x i64>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i64> [[TMP8]], <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x double> [[B]] to <8 x i64>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[TMP9]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[RES:%.*]] = bitcast <8 x i64> [[TMP13]] to <8 x double>
-; CHECK-NEXT:    ret <8 x double> [[RES]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select <8 x i1> [[CMP]], <8 x double> [[B:%.*]], <8 x double> [[A:%.*]]
+; CHECK-NEXT:    ret <8 x double> [[DOTV]]
 ;
   %a.bc = bitcast <8 x double> %a to <64 x i8>
   %b.bc = bitcast <8 x double> %b to <64 x i8>
@@ -265,21 +226,8 @@ define <8 x double> @x86_pblendvb_v8f64_v4f64(<8 x double> %a, <8 x double> %b,
 define <16 x float> @x86_pblendvb_v16f32_v8f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d) {
 ; CHECK-LABEL: @x86_pblendvb_v16f32_v8f32(
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <16 x float> [[C:%.*]], [[D:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x float> [[A:%.*]] to <16 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x float> [[B:%.*]] to <16 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x float> [[A]] to <16 x i32>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x float> [[B]] to <16 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <16 x float>
-; CHECK-NEXT:    ret <16 x float> [[RES]]
+; CHECK-NEXT:    [[DOTV:%.*]] = select <16 x i1> [[CMP]], <16 x float> [[B:%.*]], <16 x float> [[A:%.*]]
+; CHECK-NEXT:    ret <16 x float> [[DOTV]]
 ;
   %a.bc = bitcast <16 x float> %a to <64 x i8>
   %b.bc = bitcast <16 x float> %b to <64 x i8>

github-actions · 2024-07-04T14:00:11Z

✅ With the latest revision this PR passed the C/C++ code formatter.

…ement counts Basic initial patch for llvm#96884 that just handles case where we bitcast between float/intgers of the same element width

davemgreen

With the same number of elements this sounds nice and easy. LGTM, thanks.

RKSimon · 2024-07-05T07:30:06Z

Thanks, do you have any preference on how best to extend this?

…ement counts (llvm#97731) Basic initial patch for llvm#96884 that just handles case where we bitcast between float/integers of the same element width

davemgreen · 2024-07-08T06:54:37Z

Thanks, do you have any preference on how best to extend this?

Hi - I haven't thought about them too much. Maybe they can be treated a bit like shuffles?

bgra8 · 2024-07-08T16:19:02Z

@RKSimon early heads up that this is causing test failures for open source https://github.com/google/highway for test hwy/tests/sums_abs_diff_test.cc

We are working on a reproducer, a revert would be great in the mean time.

RKSimon · 2024-07-08T20:09:33Z

I'm AFK until tomorrow now, but if you can post the regression I'll take a look as soon as I can.

RKSimon · 2024-07-09T08:38:41Z

@bgra8 Any luck with that regression test please?

bgra8 · 2024-07-09T09:10:44Z

@RKSimon I was busy with other blockers and the day is just starting in my side of the world. I'm resuming work on this one now.

davemgreen · 2024-07-09T09:52:36Z

It seems to be this case, which is not reduced more than extracting the individual function: https://godbolt.org/z/xxMoM37oj

I haven't analyzed very much, but it might need to check that each of the bitcasts is the same size/type, not just the first.

RKSimon · 2024-07-09T12:00:41Z

llvm-reduce came up with:

define void @_ZN3hwy7N_SSSE325TestLoadStoreInterleaved3clIiNS0_4SimdIiLm4ELi0EEEEEvT_T0_() {
entry:
  %shuffle.i.i = shufflevector <2 x i64> zeroinitializer, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
  %0 = bitcast <4 x i32> zeroinitializer to <4 x float>
  %1 = bitcast <2 x i64> %shuffle.i.i to <4 x float>
  %shufp.i196 = shufflevector <4 x float> %0, <4 x float> %1, <4 x i32> <i32 2, i32 1, i32 4, i32 7>
  store <4 x float> %shufp.i196, ptr null, align 16
  ret void
}

fix incoming

RKSimon · 2024-07-09T12:11:49Z

@bgra8 Please can you confirm ef5b1ec fixes this?

bgra8 · 2024-07-09T12:52:53Z

@RKSimon I confirm ef5b1ec fixes my test failure. Thanks a lot for the quick fix!

RKSimon requested a review from davemgreen July 4, 2024 13:56

llvmbot added vectorizers llvm:transforms labels Jul 4, 2024

RKSimon added 2 commits July 4, 2024 15:12

[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal el…

96340c5

…ement counts Basic initial patch for llvm#96884 that just handles case where we bitcast between float/intgers of the same element width

Fix formatting

9e8043b

RKSimon force-pushed the identity-bitcasts branch from e776e81 to 9e8043b Compare July 4, 2024 14:13

davemgreen approved these changes Jul 5, 2024

View reviewed changes

RKSimon merged commit b546096 into llvm:main Jul 5, 2024
7 checks passed

RKSimon deleted the identity-bitcasts branch July 5, 2024 08:47

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts #97731

[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts #97731

Uh oh!

RKSimon commented Jul 4, 2024

Uh oh!

llvmbot commented Jul 4, 2024

Uh oh!

github-actions bot commented Jul 4, 2024 •

edited

Loading

Uh oh!

davemgreen left a comment

Uh oh!

RKSimon commented Jul 5, 2024

Uh oh!

Uh oh!

davemgreen commented Jul 8, 2024

Uh oh!

bgra8 commented Jul 8, 2024

Uh oh!

RKSimon commented Jul 8, 2024

Uh oh!

RKSimon commented Jul 9, 2024

Uh oh!

bgra8 commented Jul 9, 2024

Uh oh!

davemgreen commented Jul 9, 2024 •

edited

Loading

Uh oh!

RKSimon commented Jul 9, 2024 •

edited

Loading

Uh oh!

RKSimon commented Jul 9, 2024

Uh oh!

bgra8 commented Jul 9, 2024

Uh oh!

Uh oh!

[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts #97731

[VectorCombine] foldShuffleToIdentity - handle bitcasts with equal element counts #97731

Uh oh!

Conversation

RKSimon commented Jul 4, 2024

Uh oh!

llvmbot commented Jul 4, 2024

Uh oh!

github-actions bot commented Jul 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon commented Jul 5, 2024

Uh oh!

Uh oh!

davemgreen commented Jul 8, 2024

Uh oh!

bgra8 commented Jul 8, 2024

Uh oh!

RKSimon commented Jul 8, 2024

Uh oh!

RKSimon commented Jul 9, 2024

Uh oh!

bgra8 commented Jul 9, 2024

Uh oh!

davemgreen commented Jul 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon commented Jul 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RKSimon commented Jul 9, 2024

Uh oh!

bgra8 commented Jul 9, 2024

Uh oh!

Uh oh!

github-actions bot commented Jul 4, 2024 •

edited

Loading

davemgreen commented Jul 9, 2024 •

edited

Loading

RKSimon commented Jul 9, 2024 •

edited

Loading