Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,21 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_permdi256:
case X86::BI__builtin_ia32_permdf256:
case X86::BI__builtin_ia32_permdi512:
case X86::BI__builtin_ia32_permdf512:
case X86::BI__builtin_ia32_permdf512: {
unsigned imm =
ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();

// These intrinsics operate on 256-bit lanes of four 64-bit elements.
int64_t Indices[8];

for (unsigned l = 0; l != numElts; l += 4)
for (unsigned i = 0; i != 4; ++i)
Indices[l + i] = l + ((imm >> (2 * i)) & 0x3);

return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0],
ArrayRef(Indices, numElts));
}
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
Expand Down
62 changes: 62 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/permd-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s

// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG

#include <immintrin.h>

__m256i test__builtin_ia32_permdi256()
{
// CIR-LABEL: test__builtin_ia32_permdi256
// CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !s64i>
// LLVM-LABEL: test__builtin_ia32_permdi256
// LLVM: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// OGCG-LABEL: test__builtin_ia32_permdi256
// OGCG: shufflevector <4 x i64> {{%.*}}, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
__v4di vec = {0, 1, 2, 3};
return __builtin_ia32_permdi256(vec, 1);
}

__m512i test__builtin_ia32_permdi512()
{
// CIR-LABEL: test__builtin_ia32_permdi512
// CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !s64i>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !s64i>
// LLVM-LABEL: test__builtin_ia32_permdi512
// LLVM: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
// OGCG-LABEL: test__builtin_ia32_permdi512
// OGCG: shufflevector <8 x i64> {{%.*}}, <8 x i64> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
__v8di vec = {0, 1, 2, 3, 4, 5, 6, 7};
return __builtin_ia32_permdi512(vec, 1);
}

__m256d test__builtin_ia32_permdf256()
{
// CIR-LABEL: test__builtin_ia32_permdf256
// CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<4 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i] : !cir.vector<4 x !cir.double>
// LLVM-LABEL: test__builtin_ia32_permdf256
// LLVM: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
// OGCG-LABEL: test__builtin_ia32_permdf256
// OGCG: shufflevector <4 x double> {{%.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
__v4df vec = {0, 1, 2, 3};
return __builtin_ia32_permdf256(vec, 1);
}

__m512d test__builtin_ia32_permdf512()
{
// CIR-LABEL: test__builtin_ia32_permdf512
// CIR: cir.vec.shuffle({{%.*}}, {{%.*}}: !cir.vector<8 x !cir.double>) [#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i] : !cir.vector<8 x !cir.double>
// LLVM-LABEL: test__builtin_ia32_permdf512
// LLVM: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
// OGCG-LABEL: test__builtin_ia32_permdf512
// OGCG: shufflevector <8 x double> {{%.*}}, <8 x double> poison, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
__v8df vec = {0, 1, 2, 3, 4, 5, 6, 7};
return __builtin_ia32_permdf512(vec, 1);
}