diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index aa1a9d7be9cc..08073c5bca5f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -781,8 +781,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_blendps256: case X86::BI__builtin_ia32_pblendw256: case X86::BI__builtin_ia32_pblendd128: - case X86::BI__builtin_ia32_pblendd256: - llvm_unreachable("pblendd128 NYI"); + case X86::BI__builtin_ia32_pblendd256: { + unsigned numElts = cast(Ops[0].getType()).getSize(); + unsigned imm = + Ops[2].getDefiningOp().getIntValue().getZExtValue(); + + int64_t indices[16]; + // If there are more than 8 elements, the immediate is used twice so make + // sure we handle that. + for (unsigned i = 0; i != numElts; ++i) + indices[i] = ((imm >> (i % 8)) & 0x1) ? numElts + i : i; + + return builder.createVecShuffle(getLoc(E->getExprLoc()), Ops[0], Ops[1], + ArrayRef(indices, numElts)); + } case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: diff --git a/clang/test/CIR/CodeGen/X86/avx-builtins.c b/clang/test/CIR/CodeGen/X86/avx-builtins.c index aca20c305d65..14cf5afa889c 100644 --- a/clang/test/CIR/CodeGen/X86/avx-builtins.c +++ b/clang/test/CIR/CodeGen/X86/avx-builtins.c @@ -18,6 +18,11 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror // RUN: FileCheck --check-prefixes=LLVM-CHECK,LLVM-X64 --input-file=%t.ll %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + // This test mimics clang/test/CodeGen/X86/avx-builtins.c, which eventually // CIR shall be able to support fully. @@ -143,3 +148,27 @@ __m256i test_mm256_insert_epi64(__m256i x, long long b) { return _mm256_insert_epi64(x, b, 2); } #endif + +__m256d test_mm256_blend_pd(__m256d A, __m256d B) { + // CIR-LABEL: test_mm256_blend_pd + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm256_blend_pd + // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + + // OGCG-LABEL: test_mm256_blend_pd + // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + return _mm256_blend_pd(A, B, 0x05); +} + +__m256 test_mm256_blend_ps(__m256 A, __m256 B) { + // CIR-LABEL: test_mm256_blend_ps + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm256_blend_ps + // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + + // OGCG-LABEL: test_mm256_blend_ps + // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + return _mm256_blend_ps(A, B, 0x35); +} diff --git a/clang/test/CIR/CodeGen/X86/avx2-builtins.c b/clang/test/CIR/CodeGen/X86/avx2-builtins.c new file mode 100644 index 000000000000..b7b4733c4d42 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/avx2-builtins.c @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + +// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually +// CIR shall be able to support fully. + +#include + +// FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar) +// functions to this IR. In the future we could delete the corresponding +// intrinsic in LLVM if it's not being used anymore. +__m256i test_mm256_blend_epi16(__m256i a, __m256i b) { + // CIR-LABEL: _mm256_blend_epi16 + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<0> : !s32i, #cir.int<17> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<8> : !s32i, #cir.int<25> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm256_blend_epi16 + // LLVM-NOT: @llvm.x86.avx2.pblendw + // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + + // OGCG-LABEL: test_mm256_blend_epi16 + // OGCG-NOT: @llvm.x86.avx2.pblendw + // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> + return _mm256_blend_epi16(a, b, 2); +} + +__m128i test_mm_blend_epi32(__m128i a, __m128i b) { + // CIR-LABEL: _mm_blend_epi32 + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<4> : !s32i, #cir.int<1> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm_blend_epi32 + // LLVM-NOT: @llvm.x86.avx2.pblendd.128 + // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + + // OGCG-LABEL: test_mm_blend_epi32 + // OGCG-NOT: @llvm.x86.avx2.pblendd.128 + // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> + return _mm_blend_epi32(a, b, 0x05); +} + +__m256i test_mm256_blend_epi32(__m256i a, __m256i b) { + // CIR-LABEL: _mm256_blend_epi32 + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<8> : !s32i, #cir.int<1> : !s32i, #cir.int<10> : !s32i, #cir.int<3> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm256_blend_epi32 + // LLVM-NOT: @llvm.x86.avx2.pblendd.256 + // LLVM: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + + // OGCG-LABEL: test_mm256_blend_epi32 + // OGCG-NOT: @llvm.x86.avx2.pblendd.256 + // OGCG: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> + return _mm256_blend_epi32(a, b, 0x35); +} \ No newline at end of file diff --git a/clang/test/CIR/CodeGen/X86/sse41-builtins.c b/clang/test/CIR/CodeGen/X86/sse41-builtins.c index 814ff7bbe7eb..ba8ec065788b 100644 --- a/clang/test/CIR/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CIR/CodeGen/X86/sse41-builtins.c @@ -8,6 +8,11 @@ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +sse4.1 -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror // RUN: FileCheck --check-prefix=LLVM-CHECK --input-file=%t.ll %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG + // This test mimics clang/test/CodeGen/X86/sse41-builtins.c, which eventually // CIR shall be able to support fully. @@ -82,3 +87,39 @@ __m128i test_mm_insert_epi64(__m128i x, long long b) { return _mm_insert_epi64(x, b, 1); } #endif + +__m128i test_mm_blend_epi16(__m128i V1, __m128i V2) { + // CIR-LABEL: test_mm_blend_epi16 + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<0> : !s32i, #cir.int<9> : !s32i, #cir.int<2> : !s32i, #cir.int<11> : !s32i, #cir.int<4> : !s32i, #cir.int<13> : !s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm_blend_epi16 + // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + + // OGCG-LABEL: test_mm_blend_epi16 + // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> + return _mm_blend_epi16(V1, V2, 42); +} + +__m128d test_mm_blend_pd(__m128d V1, __m128d V2) { + // CIR-LABEL: test_mm_blend_pd + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<0> : !s32i, #cir.int<3> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm_blend_pd + // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + + // OGCG-LABEL: test_mm_blend_pd + // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> + return _mm_blend_pd(V1, V2, 2); +} + +__m128 test_mm_blend_ps(__m128 V1, __m128 V2) { + // CIR-LABEL: test_mm_blend_ps + // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector) [#cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, #cir.int<3> : !s32i] : !cir.vector + + // LLVM-LABEL: test_mm_blend_ps + // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + + // OGCG-LABEL: test_mm_blend_ps + // OGCG: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + return _mm_blend_ps(V1, V2, 6); +}