diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 72e6bea244802..ccf07dc807ace 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1210,7 +1210,21 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_permdi256: case X86::BI__builtin_ia32_permdf256: case X86::BI__builtin_ia32_permdi512: - case X86::BI__builtin_ia32_permdf512: + case X86::BI__builtin_ia32_permdf512: { + unsigned imm = + ops[1].getDefiningOp().getIntValue().getZExtValue(); + unsigned numElts = cast(ops[0].getType()).getSize(); + + // These intrinsics operate on 256-bit lanes of four 64-bit elements. + int64_t Indices[8]; + + for (unsigned l = 0; l != numElts; l += 4) + for (unsigned i = 0; i != 4; ++i) + Indices[l + i] = l + ((imm >> (2 * i)) & 0x3); + + return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], + ArrayRef(Indices, numElts)); + } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c new file mode 100644 index 0000000000000..93ae8d9c5304d --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG + +#include + +__m256i test__builtin_ia32_permdi256() +{ + // CIR-LABEL: test__builtin_ia32_permdi256 + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !s64i> + // LLVM-LABEL: test__builtin_ia32_permdi256 + // LLVM: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> + // OGCG-LABEL: test__builtin_ia32_permdi256 + // OGCG: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> + __v4di vec = {0, 1, 2, 3}; + return __builtin_ia32_permdi256(vec, 1); +} + +__m512i test__builtin_ia32_permdi512() +{ + // CIR-LABEL: test__builtin_ia32_permdi512 + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !s64i> + // LLVM-LABEL: test__builtin_ia32_permdi512 + // LLVM: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> + // OGCG-LABEL: test__builtin_ia32_permdi512 + // OGCG: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> + __v8di vec = {0, 1, 2, 3, 4, 5, 6, 7}; + return __builtin_ia32_permdi512(vec, 1); +} + +__m256d test__builtin_ia32_permdf256() +{ + // CIR-LABEL: test__builtin_ia32_permdf256 + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !cir.double> + // LLVM-LABEL: test__builtin_ia32_permdf256 + // LLVM: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> + // OGCG-LABEL: test__builtin_ia32_permdf256 + // OGCG: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> + __v4df vec = {0, 1, 2, 3}; + return __builtin_ia32_permdf256(vec, 1); +} + +__m512d test__builtin_ia32_permdf512() +{ + // CIR-LABEL: test__builtin_ia32_permdf512 + // CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !cir.double> + // LLVM-LABEL: test__builtin_ia32_permdf512 + // LLVM: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> + // OGCG-LABEL: test__builtin_ia32_permdf512 + // OGCG: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> + __v8df vec = {0, 1, 2, 3, 4, 5, 6, 7}; + return __builtin_ia32_permdf512(vec, 1); +}