-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[Clang][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow VPERMILPD/S variable mask intrinsics to be used in constexpr #168861
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang Author: None (stomfaig) ChangesAllowing intrinsics to be used in constexpr Resolves #167878 Patch is 24.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168861.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3742746def75f..d842988d92812 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -494,11 +494,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2378,9 +2381,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c63c2ce83c76f..8bccac746fb51 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4631,6 +4631,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
});
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ });
+
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ });
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a9cff7f88d6f2..0ee748075a6e0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13043,6 +13043,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512: {
@@ -13062,6 +13078,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e4184795e47e9..3f5028f335155 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5879,13 +5879,13 @@ _mm_cvttss_u64 (__m128 __A)
(__v16sf)_mm512_permute_ps((X), (C)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_pd(__m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5893,7 +5893,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5901,13 +5901,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_ps(__m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -5915,7 +5915,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 5a1b540e07e3a..e7407bd1c722c 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5847,7 +5847,7 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
(__v8sf)_mm256_permute_ps((X), (C)), \
(__v8sf)_mm256_setzero_ps()))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5855,7 +5855,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5863,7 +5863,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5871,7 +5871,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5879,7 +5879,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5887,7 +5887,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5895,7 +5895,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5903,7 +5903,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 3e1618ed192c8..1ea15b3e68811 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -787,7 +787,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
/// 1: Bits [127:64] of the source are copied to bits [127:64] of the
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_pd(__m128d __a, __m128i __c)
{
return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
@@ -826,7 +826,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
/// 1: Bits [255:192] of the source are copied to bits [255:192] of the
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_pd(__m256d __a, __m256i __c)
{
return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
@@ -881,7 +881,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
/// 11: Bits [127:96] of the source are copied to bits [127:96] of the
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_ps(__m128 __a, __m128i __c)
{
return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
@@ -972,7 +972,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
/// 11: Bits [255:224] of the source are copied to bits [255:224] of the
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_ps(__m256 __a, __m256i __c)
{
return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index f8931e7e55410..0c0c8bb3326aa 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1454,24 +1454,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})
return _mm_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutevar_pd(
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutevar_pd(
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0
+));
__m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
// CHECK-LABEL: test_mm_permutevar_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}})
return _mm_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutevar_ps(
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0
+));
__m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutevar_ps(
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0
+));
__m256 test_mm256_rcp_ps(__m256 A) {
// CHECK-LABEL: test_mm256_rcp_ps
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index e4a9d9cb3781d..9273ca89516cb 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5588,6 +5588,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
// CHECK: @llvm.x86.avx512.vpermilvar.pd.512
return _mm512_permutevar_pd(__A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutevar_pd(
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0
+));
__m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5595,6 +5602,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutevar_pd(
+ ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0
+));
__m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5602,12 +5618,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutevar_pd(
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0
+));
__m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_ps
// CHECK: @llvm.x86.avx512.vpermilvar.ps.512
return _mm512_permutevar_ps(__A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutevar_ps(
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 13.0, 12.0
+));
__m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5615,6 +5646,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutevar_ps(
+ ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 29.0, 13.0, 31.0
+));
__m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5622,6 +5662,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_maskz_permutevar_ps(
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 13.0, 0.0
+));
__m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
// CHECK-LABEL: test_mm512_permutex2var_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 69adc75c80f1c..8192a32cf5113 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8131,6 +8131,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONS...
[truncated]
|
|
@llvm/pr-subscribers-backend-x86 Author: None (stomfaig) ChangesAllowing intrinsics to be used in constexpr Resolves #167878 Patch is 24.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168861.diff 9 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 3742746def75f..d842988d92812 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -494,11 +494,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
+}
+
+let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2378,9 +2381,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
-}
-
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index c63c2ce83c76f..8bccac746fb51 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -4631,6 +4631,28 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
});
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ });
+
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512:
+ return interp__builtin_ia32_shuffle_generic(
+ S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = ShuffleMask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ });
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a9cff7f88d6f2..0ee748075a6e0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13043,6 +13043,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpermilvarpd:
+ case X86::BI__builtin_ia32_vpermilvarpd256:
+ case X86::BI__builtin_ia32_vpermilvarpd512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 2;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b10 ? 1 : 0;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512: {
@@ -13062,6 +13078,22 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}
+ case X86::BI__builtin_ia32_vpermilvarps:
+ case X86::BI__builtin_ia32_vpermilvarps256:
+ case X86::BI__builtin_ia32_vpermilvarps512: {
+ APValue R;
+ if (!evalShuffleGeneric(
+ Info, E, R,
+ [](unsigned DstIdx, unsigned Mask) -> std::pair<unsigned, int> {
+ unsigned NumElemPerLane = 4;
+ unsigned Lane = DstIdx / NumElemPerLane;
+ unsigned Offset = Mask & 0b11;
+ return std::make_pair(0, static_cast<int>(Lane * NumElemPerLane + Offset));
+ }))
+ return false;
+ return Success(R, E);
+ }
+
case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index e4184795e47e9..3f5028f335155 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -5879,13 +5879,13 @@ _mm_cvttss_u64 (__m128 __A)
(__v16sf)_mm512_permute_ps((X), (C)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_pd(__m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5893,7 +5893,7 @@ _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
{
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5901,13 +5901,13 @@ _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutevar_ps(__m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -5915,7 +5915,7 @@ _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
{
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 5a1b540e07e3a..e7407bd1c722c 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5847,7 +5847,7 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
(__v8sf)_mm256_permute_ps((X), (C)), \
(__v8sf)_mm256_setzero_ps()))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5855,7 +5855,7 @@ _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
{
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5863,7 +5863,7 @@ _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5871,7 +5871,7 @@ _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
{
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5879,7 +5879,7 @@ _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5887,7 +5887,7 @@ _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
{
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5895,7 +5895,7 @@ _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5903,7 +5903,7 @@ _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
{
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 3e1618ed192c8..1ea15b3e68811 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -787,7 +787,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_hsub_ps(__m256 __a,
/// 1: Bits [127:64] of the source are copied to bits [127:64] of the
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS128
+static __inline __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_pd(__m128d __a, __m128i __c)
{
return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
@@ -826,7 +826,7 @@ _mm_permutevar_pd(__m128d __a, __m128i __c)
/// 1: Bits [255:192] of the source are copied to bits [255:192] of the
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_pd(__m256d __a, __m256i __c)
{
return (__m256d)__builtin_ia32_vpermilvarpd256((__v4df)__a, (__v4di)__c);
@@ -881,7 +881,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
/// 11: Bits [127:96] of the source are copied to bits [127:96] of the
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
+static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_permutevar_ps(__m128 __a, __m128i __c)
{
return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
@@ -972,7 +972,7 @@ _mm_permutevar_ps(__m128 __a, __m128i __c)
/// 11: Bits [255:224] of the source are copied to bits [255:224] of the
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_permutevar_ps(__m256 __a, __m256i __c)
{
return (__m256)__builtin_ia32_vpermilvarps256((__v8sf)__a, (__v8si)__c);
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index f8931e7e55410..0c0c8bb3326aa 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1454,24 +1454,52 @@ __m128d test_mm_permutevar_pd(__m128d A, __m128i B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %{{.*}}, <2 x i64> %{{.*}})
return _mm_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m128d(
+ _mm_permutevar_pd(
+ ((__m128d){0.0, 1.0}),
+ ((__m128i){0b10, 0b00})
+ ),
+ 1.0, 0.0
+));
__m256d test_mm256_permutevar_pd(__m256d A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_pd
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %{{.*}}, <4 x i64> %{{.*}})
return _mm256_permutevar_pd(A, B);
}
+TEST_CONSTEXPR(match_m256d(
+ _mm256_permutevar_pd(
+ ((__m256d){0.0, 1.0, 2.0, 3.0}),
+ ((__m256i){0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0
+));
__m128 test_mm_permutevar_ps(__m128 A, __m128i B) {
// CHECK-LABEL: test_mm_permutevar_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %{{.*}}, <4 x i32> %{{.*}})
return _mm_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m128(
+ _mm_permutevar_ps(
+ ((__m128){0.0, 1.0, 2.0, 3.0}),
+ ((__m128i){0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0
+));
__m256 test_mm256_permutevar_ps(__m256 A, __m256i B) {
// CHECK-LABEL: test_mm256_permutevar_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_permutevar_ps(A, B);
}
+TEST_CONSTEXPR(match_m256(
+ _mm256_permutevar_ps(
+ ((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m256i){(0b10ULL << 32) + 0b11, 0b01, (0b10ULL << 32) + 0b11, 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0
+));
__m256 test_mm256_rcp_ps(__m256 A) {
// CHECK-LABEL: test_mm256_rcp_ps
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index e4a9d9cb3781d..9273ca89516cb 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -5588,6 +5588,13 @@ __m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
// CHECK: @llvm.x86.avx512.vpermilvar.pd.512
return _mm512_permutevar_pd(__A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_permutevar_pd(
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 2.0, 5.0, 4.0, 7.0, 6.0
+));
__m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_pd
@@ -5595,6 +5602,15 @@ __m512d test_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_mask_permutevar_pd(
+ ((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 9.0, 3.0, 11.0, 5.0, 13.0, 7.0, 15.0
+));
__m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_pd
@@ -5602,12 +5618,27 @@ __m512d test_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) {
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permutevar_pd(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512d(
+ _mm512_maskz_permutevar_pd(
+ (__mmask8)0b01010101,
+ ((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
+ ((__m512i){0b10, 0b00, 0b10, 0b00, 0b10, 0b00, 0b10, 0b00})
+ ),
+ 1.0, 0.0, 3.0, 0.0, 5.0, 0.0, 7.0, 0.0
+));
__m512 test_mm512_permutevar_ps(__m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_ps
// CHECK: @llvm.x86.avx512.vpermilvar.ps.512
return _mm512_permutevar_ps(__A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_permutevar_ps(
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 2.0, 1.0, 0.0, 7.0, 6.0, 5.0, 4.0, 11.0, 10.0, 9.0, 8.0, 15.0, 14.0, 13.0, 12.0
+));
__m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_permutevar_ps
@@ -5615,6 +5646,15 @@ __m512 test_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m5
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permutevar_ps(__W, __U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_mask_permutevar_ps(
+ ((__m512){16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0}),
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 17.0, 1.0, 19.0, 7.0, 21.0, 5.0, 23.0, 11.0, 25.0, 9.0, 27.0, 15.0, 29.0, 13.0, 31.0
+));
__m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_permutevar_ps
@@ -5622,6 +5662,14 @@ __m512 test_mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) {
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permutevar_ps(__U, __A, __C);
}
+TEST_CONSTEXPR(match_m512(
+ _mm512_maskz_permutevar_ps(
+ (__mmask16)0b0101010101010101,
+ ((__m512){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
+ ((__m512i){0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01, 0b11 + (0b10ULL << 32), 0b01})
+ ),
+ 3.0, 0.0, 1.0, 0.0, 7.0, 0.0, 5.0, 0.0, 11.0, 0.0, 9.0, 0.0, 15.0, 0.0, 13.0, 0.0
+));
__m512i test_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) {
// CHECK-LABEL: test_mm512_permutex2var_epi32
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 69adc75c80f1c..8192a32cf5113 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -8131,6 +8131,15 @@ __m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m12
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permutevar_pd(__W, __U, __A, __C);
}
+TEST_CONS...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks!
| def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">; | ||
| def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">; | ||
| def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">; | ||
| def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
move these to an existing block with the same features/attributes
| TEST_CONSTEXPR(match_m256d( | ||
| _mm256_permutevar_pd( | ||
| ((__m256d){0.0, 1.0, 2.0, 3.0}), | ||
| ((__m256i){0b10, 0b00, 0b10, 0b00}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
avoid matching lane masks ? it helps avoid bugs where we're reused the wrong lane offsets
| TEST_CONSTEXPR(match_m128( | ||
| _mm_permutevar_ps( | ||
| ((__m128){0.0, 1.0, 2.0, 3.0}), | ||
| ((__m128i){0b11 + (0b10ULL << 32), 0b01}) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
better to use ((__m128i)(__v4si){..., ,,,, ,,,, ,,,}) style
You can use this format. |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/52/builds/13050 Here is the relevant piece of the build log for the reference |
… allow VPERMILPD/S variable mask intrinsics to be used in constexpr (llvm#168861) Allowing VPERMILPD/S intrinsics to be used in constexpr Closes llvm#167878
… allow VPERMILPD/S variable mask intrinsics to be used in constexpr (llvm#168861) Allowing VPERMILPD/S intrinsics to be used in constexpr Closes llvm#167878
… allow VPERMILPD/S variable mask intrinsics to be used in constexpr (llvm#168861) Allowing VPERMILPD/S intrinsics to be used in constexpr Closes llvm#167878
Allowing intrinsics to be used in constexpr
Closes #167878