From b9e10359920086ed24ec725b4eab78714b72a9f9 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Fri, 27 Dec 2024 17:49:02 +0900 Subject: [PATCH 1/7] add new tests --- .../X86/extract-insert-poison.ll | 184 ++++++++++++++++++ .../VectorCombine/X86/extract-insert-undef.ll | 184 ++++++++++++++++++ .../VectorCombine/X86/extract-insert.ll | 183 +++++++++++++++++ 3 files changed, 551 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll create mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert.ll diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll new file mode 100644 index 0000000000000..f9cfe2f516687 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + + +define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> poison, double %ext, i32 3 + ret <4 x double> %ins +} + +define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> poison, double %ext, i32 3 + ret <4 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> poison, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> poison, double %ext, i32 1 + ret <2 x double> %ins +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll new file mode 100644 index 0000000000000..c47c196bb92ba --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + + +define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> undef, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> undef, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> undef, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> undef, double %ext, i32 3 + ret <4 x double> %ins +} + +define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> undef, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> undef, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> undef, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> undef, double %ext, i32 3 + ret <4 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> undef, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> undef, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> undef, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> undef, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> undef, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> undef, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> undef, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> undef, double %ext, i32 1 + ret <2 x double> %ins +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll new file mode 100644 index 0000000000000..66dc9aac6b678 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + + +define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 0 + %ins = insertelement <4 x double> %a, double %ext, i32 3 + ret <4 x double> %ins +} + +define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 0 + ret <4 x double> %ins +} + +define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 1 + ret <4 x double> %ins +} + +define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 2 + ret <4 x double> %ins +} + +define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { +; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3 +; CHECK-NEXT: ret <4 x double> [[INS]] +; + %ext = extractelement <2 x double> %b, i32 1 + %ins = insertelement <4 x double> %a, double %ext, i32 3 + ret <4 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> %a, double %ext, i32 0 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 0 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 1 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 2 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} + +define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { +; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( +; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: ret <2 x double> [[INS]] +; + %ext = extractelement <4 x double> %b, i32 3 + %ins = insertelement <2 x double> %a, double %ext, i32 1 + ret <2 x double> %ins +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} From 7f056fc1c28c29eb0e7123a321a24a79570a2600 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Fri, 27 Dec 2024 17:49:24 +0900 Subject: [PATCH 2/7] [VectorCombine] Allow shuffling between vectors the same type but different element sizes `foldInsExtVectorToShuffle` function combines the extract/insert of a vector into a vector through a shuffle. However, we only supported coupling between vectors of the same size. This commit allows combining extract/insert for vectors of the same type but with different sizes by converting the length of the vectors. Proof: https://alive2.llvm.org/ce/z/EWFmfL Fixed #120772 --- .../Transforms/Vectorize/VectorCombine.cpp | 54 +++++++++++++---- .../X86/extract-insert-poison.ll | 58 +++++++++++-------- .../VectorCombine/X86/extract-insert-undef.ll | 28 ++++----- .../VectorCombine/X86/extract-insert.ll | 54 +++++++++-------- .../VectorCombine/X86/load-inseltpoison.ll | 4 +- 5 files changed, 127 insertions(+), 71 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ea53a1acebd1d..0387898996732 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3068,24 +3068,37 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { return false; auto *VecTy = dyn_cast(I.getType()); - if (!VecTy || SrcVec->getType() != VecTy) + auto *SrcVecTy = dyn_cast(SrcVec->getType()); + // We can try combining vectors with different element sizes. + if (!VecTy || !SrcVecTy || + SrcVecTy->getElementType() != VecTy->getElementType()) return false; unsigned NumElts = VecTy->getNumElements(); - if (ExtIdx >= NumElts || InsIdx >= NumElts) + unsigned NumSrcElts = SrcVecTy->getNumElements(); + if (InsIdx >= NumElts || NumElts == 1) return false; // Insertion into poison is a cheaper single operand shuffle. TargetTransformInfo::ShuffleKind SK; SmallVector Mask(NumElts, PoisonMaskElem); - if (isa(DstVec) && !isa(SrcVec)) { + + bool NeedExpOrNarrow = NumSrcElts != NumElts; + bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec); + if (NeedDstSrcSwap) { SK = TargetTransformInfo::SK_PermuteSingleSrc; - Mask[InsIdx] = ExtIdx; + if (!NeedExpOrNarrow) + Mask[InsIdx] = ExtIdx; + else + Mask[InsIdx] = 0; std::swap(DstVec, SrcVec); } else { SK = TargetTransformInfo::SK_PermuteTwoSrc; std::iota(Mask.begin(), Mask.end(), 0); - Mask[InsIdx] = ExtIdx + NumElts; + if (!NeedExpOrNarrow) + Mask[InsIdx] = ExtIdx + NumElts; + else + Mask[InsIdx] = NumElts; } // Cost @@ -3097,12 +3110,26 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx); InstructionCost OldCost = ExtCost + InsCost; - // Ignore 'free' identity insertion shuffle. - // TODO: getShuffleCost should return TCC_Free for Identity shuffles. InstructionCost NewCost = 0; - if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts)) - NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr, - {DstVec, SrcVec}); + SmallVector ExtToVecMask; + if (!NeedExpOrNarrow) { + // Ignore 'free' identity insertion shuffle. + // TODO: getShuffleCost should return TCC_Free for Identity shuffles. + if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts)) + NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr, + {DstVec, SrcVec}); + } else { + // When creating length-changing-vector, always create with a Mask whose + // first element has an ExtIdx, so that the first element of the vector + // being created is always the target to be extracted. + ExtToVecMask.assign(NumElts, PoisonMaskElem); + ExtToVecMask[0] = ExtIdx; + // Add cost for expanding or narrowing + NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + VecTy, ExtToVecMask, CostKind); + NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind); + } + if (!Ext->hasOneUse()) NewCost += ExtCost; @@ -3113,6 +3140,13 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { if (OldCost < NewCost) return false; + if (NeedExpOrNarrow) { + if (!NeedDstSrcSwap) + SrcVec = Builder.CreateShuffleVector(SrcVec, ExtToVecMask); + else + DstVec = Builder.CreateShuffleVector(DstVec, ExtToVecMask); + } + // Canonicalize undef param to RHS to help further folds. if (isa(DstVec) && !isa(SrcVec)) { ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll index f9cfe2f516687..dc0ca3dacbb26 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -26,10 +26,15 @@ define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins2_v4f64_ext0_v2f64( +; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins2_v4f64_ext0_v2f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 0 %ins = insertelement <4 x double> poison, double %ext, i32 2 @@ -37,10 +42,15 @@ define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins3_v4f64_ext0_v2f64( +; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins3_v4f64_ext0_v2f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 0 %ins = insertelement <4 x double> poison, double %ext, i32 3 @@ -60,8 +70,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -70,10 +80,15 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64( +; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> poison, double %ext, i32 2 @@ -82,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -148,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -170,8 +185,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> poison, double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 @@ -179,6 +194,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) ret <2 x double> %ins } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll index c47c196bb92ba..6051e6ff512fe 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll @@ -59,10 +59,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> undef, double %ext, i32 1 @@ -82,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -148,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -170,8 +175,8 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 @@ -179,6 +184,3 @@ define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) ret <2 x double> %ins } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll index 66dc9aac6b678..31c4834ff6584 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll @@ -48,10 +48,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 0 +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> %a, double %ext, i32 0 @@ -59,10 +64,15 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> %a, double %ext, i32 1 @@ -71,8 +81,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -82,8 +92,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -104,8 +114,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -126,8 +136,8 @@ define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 @@ -148,8 +158,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -170,14 +180,12 @@ define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> [[A:%.*]], double [[EXT]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 3 %ins = insertelement <2 x double> %a, double %ext, i32 1 ret <2 x double> %ins } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE: {{.*}} + diff --git a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll index 2db1e21b3e95a..3b2aa5e6603b0 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll @@ -589,8 +589,8 @@ define <8 x i32> @load_v1i32_extract_insert_v8i32_extra_use(ptr align 16 derefer ; CHECK-LABEL: @load_v1i32_extract_insert_v8i32_extra_use( ; CHECK-NEXT: [[L:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 ; CHECK-NEXT: store <1 x i32> [[L]], ptr [[STORE_PTR:%.*]], align 4 -; CHECK-NEXT: [[S:%.*]] = extractelement <1 x i32> [[L]], i32 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i32> poison, i32 [[S]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[L]], <1 x i32> poison, <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %l = load <1 x i32>, ptr %p, align 4 From 687d1c921fb6dc08ec902a694845587252d29231 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 7 Jan 2025 01:40:14 +0900 Subject: [PATCH 3/7] Add the omitted Dst to the variable name --- .../Transforms/Vectorize/VectorCombine.cpp | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 0387898996732..89477246e0ec9 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3067,23 +3067,23 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { m_ConstantInt(InsIdx)))) return false; - auto *VecTy = dyn_cast(I.getType()); + auto *DstVecTy = dyn_cast(I.getType()); auto *SrcVecTy = dyn_cast(SrcVec->getType()); // We can try combining vectors with different element sizes. - if (!VecTy || !SrcVecTy || - SrcVecTy->getElementType() != VecTy->getElementType()) + if (!DstVecTy || !SrcVecTy || + SrcVecTy->getElementType() != DstVecTy->getElementType()) return false; - unsigned NumElts = VecTy->getNumElements(); + unsigned NumDstElts = DstVecTy->getNumElements(); unsigned NumSrcElts = SrcVecTy->getNumElements(); - if (InsIdx >= NumElts || NumElts == 1) + if (InsIdx >= NumDstElts || NumDstElts == 1) return false; // Insertion into poison is a cheaper single operand shuffle. TargetTransformInfo::ShuffleKind SK; - SmallVector Mask(NumElts, PoisonMaskElem); + SmallVector Mask(NumDstElts, PoisonMaskElem); - bool NeedExpOrNarrow = NumSrcElts != NumElts; + bool NeedExpOrNarrow = NumSrcElts != NumDstElts; bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec); if (NeedDstSrcSwap) { SK = TargetTransformInfo::SK_PermuteSingleSrc; @@ -3096,18 +3096,18 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { SK = TargetTransformInfo::SK_PermuteTwoSrc; std::iota(Mask.begin(), Mask.end(), 0); if (!NeedExpOrNarrow) - Mask[InsIdx] = ExtIdx + NumElts; + Mask[InsIdx] = ExtIdx + NumDstElts; else - Mask[InsIdx] = NumElts; + Mask[InsIdx] = NumDstElts; } // Cost auto *Ins = cast(&I); auto *Ext = cast(I.getOperand(1)); InstructionCost InsCost = - TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx); + TTI.getVectorInstrCost(*Ins, DstVecTy, CostKind, InsIdx); InstructionCost ExtCost = - TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx); + TTI.getVectorInstrCost(*Ext, DstVecTy, CostKind, ExtIdx); InstructionCost OldCost = ExtCost + InsCost; InstructionCost NewCost = 0; @@ -3115,19 +3115,19 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { if (!NeedExpOrNarrow) { // Ignore 'free' identity insertion shuffle. // TODO: getShuffleCost should return TCC_Free for Identity shuffles. - if (!ShuffleVectorInst::isIdentityMask(Mask, NumElts)) - NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind, 0, nullptr, + if (!ShuffleVectorInst::isIdentityMask(Mask, NumDstElts)) + NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {DstVec, SrcVec}); } else { // When creating length-changing-vector, always create with a Mask whose // first element has an ExtIdx, so that the first element of the vector // being created is always the target to be extracted. - ExtToVecMask.assign(NumElts, PoisonMaskElem); + ExtToVecMask.assign(NumDstElts, PoisonMaskElem); ExtToVecMask[0] = ExtIdx; // Add cost for expanding or narrowing NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, - VecTy, ExtToVecMask, CostKind); - NewCost += TTI.getShuffleCost(SK, VecTy, Mask, CostKind); + DstVecTy, ExtToVecMask, CostKind); + NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind); } if (!Ext->hasOneUse()) @@ -3149,7 +3149,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { // Canonicalize undef param to RHS to help further folds. if (isa(DstVec) && !isa(SrcVec)) { - ShuffleVectorInst::commuteShuffleMask(Mask, NumElts); + ShuffleVectorInst::commuteShuffleMask(Mask, NumDstElts); std::swap(DstVec, SrcVec); } From 6a04dfe8005ad20e6ca750928964d579e4c694ec Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 7 Jan 2025 01:43:46 +0900 Subject: [PATCH 4/7] Returns false early if condition 'ExtIdx>=NumSrcElts' is met --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 89477246e0ec9..12555588ed47f 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3076,7 +3076,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { unsigned NumDstElts = DstVecTy->getNumElements(); unsigned NumSrcElts = SrcVecTy->getNumElements(); - if (InsIdx >= NumDstElts || NumDstElts == 1) + if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1) return false; // Insertion into poison is a cheaper single operand shuffle. From 824c68ec9d359bc358534b61178ef4040b2c19f4 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Sat, 18 Jan 2025 21:37:58 +0900 Subject: [PATCH 5/7] remove test cases for undef --- .../VectorCombine/X86/extract-insert-undef.ll | 186 ------------------ 1 file changed, 186 deletions(-) delete mode 100644 llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll deleted file mode 100644 index 6051e6ff512fe..0000000000000 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-undef.ll +++ /dev/null @@ -1,186 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX - - -define <4 x double> @src_ins0_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins0_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 0 - %ins = insertelement <4 x double> undef, double %ext, i32 0 - ret <4 x double> %ins -} - -define <4 x double> @src_ins1_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins1_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 0 - %ins = insertelement <4 x double> undef, double %ext, i32 1 - ret <4 x double> %ins -} - -define <4 x double> @src_ins2_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins2_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 0 - %ins = insertelement <4 x double> undef, double %ext, i32 2 - ret <4 x double> %ins -} - -define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins3_v4f64_ext0_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 3 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 0 - %ins = insertelement <4 x double> undef, double %ext, i32 3 - ret <4 x double> %ins -} - -define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 1 - %ins = insertelement <4 x double> undef, double %ext, i32 0 - ret <4 x double> %ins -} - -define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> -; SSE-NEXT: ret <4 x double> [[INS]] -; -; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64( -; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 1 -; AVX-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 1 - %ins = insertelement <4 x double> undef, double %ext, i32 1 - ret <4 x double> %ins -} - -define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> undef, double [[EXT]], i32 2 -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 1 - %ins = insertelement <4 x double> undef, double %ext, i32 2 - ret <4 x double> %ins -} - -define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> undef, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[INS]] -; - %ext = extractelement <2 x double> %b, i32 1 - %ins = insertelement <4 x double> undef, double %ext, i32 3 - ret <4 x double> %ins -} - -define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins0_v2f64_ext0_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 0 - %ins = insertelement <2 x double> undef, double %ext, i32 0 - ret <2 x double> %ins -} - -define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 1 - %ins = insertelement <2 x double> undef, double %ext, i32 0 - ret <2 x double> %ins -} - -define <2 x double> @src_ins0_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins0_v2f64_ext2_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 2 - %ins = insertelement <2 x double> undef, double %ext, i32 0 - ret <2 x double> %ins -} - -define <2 x double> @src_ins0_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins0_v2f64_ext3_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 3 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 0 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 3 - %ins = insertelement <2 x double> undef, double %ext, i32 0 - ret <2 x double> %ins -} - -define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins1_v2f64_ext0_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 0 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 0 - %ins = insertelement <2 x double> undef, double %ext, i32 1 - ret <2 x double> %ins -} - -define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 1 - %ins = insertelement <2 x double> undef, double %ext, i32 1 - ret <2 x double> %ins -} - -define <2 x double> @src_ins1_v2f64_ext2_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins1_v2f64_ext2_v4f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[B:%.*]], i32 2 -; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[EXT]], i32 1 -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 2 - %ins = insertelement <2 x double> undef, double %ext, i32 1 - ret <2 x double> %ins -} - -define <2 x double> @src_ins1_v2f64_ext3_v4f64(<2 x double> %a, <4 x double> %b) { -; CHECK-LABEL: @src_ins1_v2f64_ext3_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: ret <2 x double> [[INS]] -; - %ext = extractelement <4 x double> %b, i32 3 - %ins = insertelement <2 x double> undef, double %ext, i32 1 - ret <2 x double> %ins -} - From e6525d911765e444dd16c9219d70afb72ba5e474 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Tue, 21 Jan 2025 04:01:20 +0900 Subject: [PATCH 6/7] keep extidx if it within dest vector bound --- .../Transforms/Vectorize/VectorCombine.cpp | 18 +++++---- .../X86/extract-insert-poison.ll | 38 +++++++++---------- .../VectorCombine/X86/extract-insert.ll | 33 +++++++--------- 3 files changed, 44 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 12555588ed47f..1e1ecba84fea0 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3084,21 +3084,22 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { SmallVector Mask(NumDstElts, PoisonMaskElem); bool NeedExpOrNarrow = NumSrcElts != NumDstElts; + bool IsExtIdxInBounds = ExtIdx < NumDstElts; bool NeedDstSrcSwap = isa(DstVec) && !isa(SrcVec); if (NeedDstSrcSwap) { SK = TargetTransformInfo::SK_PermuteSingleSrc; - if (!NeedExpOrNarrow) - Mask[InsIdx] = ExtIdx; - else + if (!IsExtIdxInBounds && NeedExpOrNarrow) Mask[InsIdx] = 0; + else + Mask[InsIdx] = ExtIdx; std::swap(DstVec, SrcVec); } else { SK = TargetTransformInfo::SK_PermuteTwoSrc; std::iota(Mask.begin(), Mask.end(), 0); - if (!NeedExpOrNarrow) - Mask[InsIdx] = ExtIdx + NumDstElts; - else + if (!IsExtIdxInBounds && NeedExpOrNarrow) Mask[InsIdx] = NumDstElts; + else + Mask[InsIdx] = ExtIdx + NumDstElts; } // Cost @@ -3123,7 +3124,10 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { // first element has an ExtIdx, so that the first element of the vector // being created is always the target to be extracted. ExtToVecMask.assign(NumDstElts, PoisonMaskElem); - ExtToVecMask[0] = ExtIdx; + if (IsExtIdxInBounds) + ExtToVecMask[ExtIdx] = ExtIdx; + else + ExtToVecMask[0] = ExtIdx; // Add cost for expanding or narrowing NewCost = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, DstVecTy, ExtToVecMask, CostKind); diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll index dc0ca3dacbb26..0a9386c0b8db1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll @@ -58,10 +58,15 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; CHECK-LABEL: @src_ins0_v4f64_ext1_v2f64( -; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 -; CHECK-NEXT: ret <4 x double> [[INS]] +; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; SSE-NEXT: ret <4 x double> [[INS]] +; +; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64( +; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 +; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 0 +; AVX-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> poison, double %ext, i32 0 @@ -70,8 +75,8 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -80,15 +85,10 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; SSE-LABEL: @src_ins2_v4f64_ext1_v2f64( -; SSE-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; SSE-NEXT: [[INS:%.*]] = insertelement <4 x double> poison, double [[EXT]], i32 2 -; SSE-NEXT: ret <4 x double> [[INS]] -; -; AVX-LABEL: @src_ins2_v4f64_ext1_v2f64( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; AVX-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> -; AVX-NEXT: ret <4 x double> [[INS]] +; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> poison, double %ext, i32 2 @@ -97,8 +97,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -163,8 +163,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll index 31c4834ff6584..41200e517f5ed 100644 --- a/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll +++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert.ll @@ -49,8 +49,8 @@ define <4 x double> @src_ins3_v4f64_ext0_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; SSE-LABEL: @src_ins0_v4f64_ext1_v2f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> ; SSE-NEXT: ret <4 x double> [[INS]] ; ; AVX-LABEL: @src_ins0_v4f64_ext1_v2f64( @@ -64,15 +64,10 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) } define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { -; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; SSE-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> -; SSE-NEXT: ret <4 x double> [[INS]] -; -; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64( -; AVX-NEXT: [[EXT:%.*]] = extractelement <2 x double> [[B:%.*]], i32 1 -; AVX-NEXT: [[INS:%.*]] = insertelement <4 x double> [[A:%.*]], double [[EXT]], i32 1 -; AVX-NEXT: ret <4 x double> [[INS]] +; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 %ins = insertelement <4 x double> %a, double %ext, i32 1 @@ -81,8 +76,8 @@ define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins2_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -92,8 +87,8 @@ define <4 x double> @src_ins2_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) define <4 x double> @src_ins3_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 { ; CHECK-LABEL: @src_ins3_v4f64_ext1_v2f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[TMP1]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[INS]] ; %ext = extractelement <2 x double> %b, i32 1 @@ -114,8 +109,8 @@ define <2 x double> @src_ins0_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins0_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins0_v2f64_ext1_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 @@ -158,8 +153,8 @@ define <2 x double> @src_ins1_v2f64_ext0_v4f64(<2 x double> %a, <4 x double> %b) define <2 x double> @src_ins1_v2f64_ext1_v4f64(<2 x double> %a, <4 x double> %b) { ; CHECK-LABEL: @src_ins1_v2f64_ext1_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> -; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[B:%.*]], <4 x double> poison, <2 x i32> +; CHECK-NEXT: [[INS:%.*]] = shufflevector <2 x double> [[A:%.*]], <2 x double> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x double> [[INS]] ; %ext = extractelement <4 x double> %b, i32 1 From cb69be089b147ce42e483f8560f28f713a19aa09 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Fri, 31 Jan 2025 05:38:31 +0900 Subject: [PATCH 7/7] fix wrong argument for isIdentityMask --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 1e1ecba84fea0..a28401bd84930 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3116,7 +3116,7 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { if (!NeedExpOrNarrow) { // Ignore 'free' identity insertion shuffle. // TODO: getShuffleCost should return TCC_Free for Identity shuffles. - if (!ShuffleVectorInst::isIdentityMask(Mask, NumDstElts)) + if (!ShuffleVectorInst::isIdentityMask(Mask, NumSrcElts)) NewCost += TTI.getShuffleCost(SK, DstVecTy, Mask, CostKind, 0, nullptr, {DstVec, SrcVec}); } else {