Skip to content

Commit 632d13c

Browse files
authored
[X86] Align other variants to use void * as 512 variants. (#66310)
For *_stream_* series intrinsics
1 parent 65341b0 commit 632d13c

13 files changed

+98
-18
lines changed

clang/lib/Headers/ammintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,9 @@ _mm_insert_si64(__m128i __x, __m128i __y)
155155
/// \param __a
156156
/// The 64-bit double-precision floating-point register value to be stored.
157157
static __inline__ void __DEFAULT_FN_ATTRS
158-
_mm_stream_sd(double *__p, __m128d __a)
158+
_mm_stream_sd(void *__p, __m128d __a)
159159
{
160-
__builtin_ia32_movntsd(__p, (__v2df)__a);
160+
__builtin_ia32_movntsd((double *)__p, (__v2df)__a);
161161
}
162162

163163
/// Stores a 32-bit single-precision floating-point value in a 32-bit
@@ -173,9 +173,9 @@ _mm_stream_sd(double *__p, __m128d __a)
173173
/// \param __a
174174
/// The 32-bit single-precision floating-point register value to be stored.
175175
static __inline__ void __DEFAULT_FN_ATTRS
176-
_mm_stream_ss(float *__p, __m128 __a)
176+
_mm_stream_ss(void *__p, __m128 __a)
177177
{
178-
__builtin_ia32_movntss(__p, (__v4sf)__a);
178+
__builtin_ia32_movntss((float *)__p, (__v4sf)__a);
179179
}
180180

181181
#undef __DEFAULT_FN_ATTRS

clang/lib/Headers/avx2intrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
29792979
/// A pointer to the 32-byte aligned memory containing the vector to load.
29802980
/// \returns A 256-bit integer vector loaded from memory.
29812981
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2982-
_mm256_stream_load_si256(__m256i const *__V)
2982+
_mm256_stream_load_si256(const void *__V)
29832983
{
29842984
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
29852985
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);

clang/lib/Headers/avxintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
35633563
/// \param __b
35643564
/// A 256-bit integer vector containing the values to be moved.
35653565
static __inline void __DEFAULT_FN_ATTRS
3566-
_mm256_stream_si256(__m256i *__a, __m256i __b)
3566+
_mm256_stream_si256(void *__a, __m256i __b)
35673567
{
35683568
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
35693569
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
@@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
35833583
/// \param __b
35843584
/// A 256-bit vector of [4 x double] containing the values to be moved.
35853585
static __inline void __DEFAULT_FN_ATTRS
3586-
_mm256_stream_pd(double *__a, __m256d __b)
3586+
_mm256_stream_pd(void *__a, __m256d __b)
35873587
{
35883588
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
35893589
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
@@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
36043604
/// \param __a
36053605
/// A 256-bit vector of [8 x float] containing the values to be moved.
36063606
static __inline void __DEFAULT_FN_ATTRS
3607-
_mm256_stream_ps(float *__p, __m256 __a)
3607+
_mm256_stream_ps(void *__p, __m256 __a)
36083608
{
36093609
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
36103610
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);

clang/lib/Headers/emmintrin.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
39453945
/// A pointer to the 128-bit aligned memory location used to store the value.
39463946
/// \param __a
39473947
/// A vector of [2 x double] containing the 64-bit values to be stored.
3948-
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
3948+
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
39493949
__m128d __a) {
39503950
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
39513951
}
@@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
39633963
/// A pointer to the 128-bit aligned memory location used to store the value.
39643964
/// \param __a
39653965
/// A 128-bit integer vector containing the values to be stored.
3966-
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
3966+
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
39673967
__m128i __a) {
39683968
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
39693969
}
@@ -3983,8 +3983,8 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
39833983
/// A 32-bit integer containing the value to be stored.
39843984
static __inline__ void
39853985
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
3986-
_mm_stream_si32(int *__p, int __a) {
3987-
__builtin_ia32_movnti(__p, __a);
3986+
_mm_stream_si32(void *__p, int __a) {
3987+
__builtin_ia32_movnti((int *)__p, __a);
39883988
}
39893989

39903990
#ifdef __x86_64__
@@ -4003,8 +4003,8 @@ static __inline__ void
40034003
/// A 64-bit integer containing the value to be stored.
40044004
static __inline__ void
40054005
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4006-
_mm_stream_si64(long long *__p, long long __a) {
4007-
__builtin_ia32_movnti64(__p, __a);
4006+
_mm_stream_si64(void *__p, long long __a) {
4007+
__builtin_ia32_movnti64((long long *)__p, __a);
40084008
}
40094009
#endif
40104010

clang/lib/Headers/smmintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
645645
/// \returns A 128-bit integer vector containing the data stored at the
646646
/// specified memory location.
647647
static __inline__ __m128i __DEFAULT_FN_ATTRS
648-
_mm_stream_load_si128(__m128i const *__V) {
648+
_mm_stream_load_si128(const void *__V) {
649649
return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
650650
}
651651

clang/lib/Headers/xmmintrin.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2121,9 +2121,9 @@ _mm_storer_ps(float *__p, __m128 __a)
21212121
/// \param __a
21222122
/// A 64-bit integer containing the value to be stored.
21232123
static __inline__ void __DEFAULT_FN_ATTRS_MMX
2124-
_mm_stream_pi(__m64 *__p, __m64 __a)
2124+
_mm_stream_pi(void *__p, __m64 __a)
21252125
{
2126-
__builtin_ia32_movntq(__p, __a);
2126+
__builtin_ia32_movntq((__m64 *)__p, __a);
21272127
}
21282128

21292129
/// Moves packed float values from a 128-bit vector of [4 x float] to a
@@ -2140,7 +2140,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a)
21402140
/// \param __a
21412141
/// A 128-bit vector of [4 x float] containing the values to be moved.
21422142
static __inline__ void __DEFAULT_FN_ATTRS
2143-
_mm_stream_ps(float *__p, __m128 __a)
2143+
_mm_stream_ps(void *__p, __m128 __a)
21442144
{
21452145
__builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
21462146
}

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1940,18 +1940,36 @@ void test_mm256_stream_pd(double* A, __m256d B) {
19401940
_mm256_stream_pd(A, B);
19411941
}
19421942

1943+
void test_mm256_stream_pd_void(void *A, __m256d B) {
1944+
// CHECK-LABEL: test_mm256_stream_pd_void
1945+
// CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
1946+
_mm256_stream_pd(A, B);
1947+
}
1948+
19431949
void test_mm256_stream_ps(float* A, __m256 B) {
19441950
// CHECK-LABEL: test_mm256_stream_ps
19451951
// CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
19461952
_mm256_stream_ps(A, B);
19471953
}
19481954

1955+
void test_mm256_stream_ps_void(void *A, __m256 B) {
1956+
// CHECK-LABEL: test_mm256_stream_ps_void
1957+
// CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
1958+
_mm256_stream_ps(A, B);
1959+
}
1960+
19491961
void test_mm256_stream_si256(__m256i* A, __m256i B) {
19501962
// CHECK-LABEL: test_mm256_stream_si256
19511963
// CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
19521964
_mm256_stream_si256(A, B);
19531965
}
19541966

1967+
void test_mm256_stream_si256_void(void *A, __m256i B) {
1968+
// CHECK-LABEL: test_mm256_stream_si256_void
1969+
// CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
1970+
_mm256_stream_si256(A, B);
1971+
}
1972+
19551973
__m256d test_mm256_sub_pd(__m256d A, __m256d B) {
19561974
// CHECK-LABEL: test_mm256_sub_pd
19571975
// CHECK: fsub <4 x double>

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) {
12231223
return _mm256_stream_load_si256(a);
12241224
}
12251225

1226+
__m256i test_mm256_stream_load_si256_void(const void *a) {
1227+
// CHECK-LABEL: test_mm256_stream_load_si256_void
1228+
// CHECK: load <4 x i64>, ptr %{{.*}}, align 32, !nontemporal
1229+
return _mm256_stream_load_si256(a);
1230+
}
1231+
12261232
__m256i test_mm256_sub_epi8(__m256i a, __m256i b) {
12271233
// CHECK-LABEL: test_mm256_sub_epi8
12281234
// CHECK: sub <32 x i8>

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,12 @@ void test_mm_stream_pi(__m64 *p, __m64 a) {
601601
_mm_stream_pi(p, a);
602602
}
603603

604+
void test_mm_stream_pi_void(void *p, __m64 a) {
605+
// CHECK-LABEL: test_mm_stream_pi_void
606+
// CHECK: call void @llvm.x86.mmx.movnt.dq
607+
_mm_stream_pi(p, a);
608+
}
609+
604610
__m64 test_mm_sub_pi8(__m64 a, __m64 b) {
605611
// CHECK-LABEL: test_mm_sub_pi8
606612
// CHECK: call x86_mmx @llvm.x86.mmx.psub.b

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,12 @@ void test_mm_stream_ps(float*A, __m128 B) {
720720
_mm_stream_ps(A, B);
721721
}
722722

723+
void test_mm_stream_ps_void(void *A, __m128 B) {
724+
// CHECK-LABEL: test_mm_stream_ps_void
725+
// CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
726+
_mm_stream_ps(A, B);
727+
}
728+
723729
__m128 test_mm_sub_ps(__m128 A, __m128 B) {
724730
// CHECK-LABEL: test_mm_sub_ps
725731
// CHECK: fsub <4 x float>

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,18 +1488,36 @@ void test_mm_stream_pd(double *A, __m128d B) {
14881488
_mm_stream_pd(A, B);
14891489
}
14901490

1491+
void test_mm_stream_pd_void(void *A, __m128d B) {
1492+
// CHECK-LABEL: test_mm_stream_pd_void
1493+
// CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1494+
_mm_stream_pd(A, B);
1495+
}
1496+
14911497
void test_mm_stream_si32(int *A, int B) {
14921498
// CHECK-LABEL: test_mm_stream_si32
14931499
// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
14941500
_mm_stream_si32(A, B);
14951501
}
14961502

1503+
void test_mm_stream_si32_void(void *A, int B) {
1504+
// CHECK-LABEL: test_mm_stream_si32_void
1505+
// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1506+
_mm_stream_si32(A, B);
1507+
}
1508+
14971509
#ifdef __x86_64__
14981510
void test_mm_stream_si64(long long *A, long long B) {
14991511
// X64-LABEL: test_mm_stream_si64
15001512
// X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
15011513
_mm_stream_si64(A, B);
15021514
}
1515+
1516+
void test_mm_stream_si64_void(void *A, long long B) {
1517+
// X64-LABEL: test_mm_stream_si64_void
1518+
// X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
1519+
_mm_stream_si64(A, B);
1520+
}
15031521
#endif
15041522

15051523
void test_mm_stream_si128(__m128i *A, __m128i B) {
@@ -1508,6 +1526,12 @@ void test_mm_stream_si128(__m128i *A, __m128i B) {
15081526
_mm_stream_si128(A, B);
15091527
}
15101528

1529+
void test_mm_stream_si128_void(void *A, __m128i B) {
1530+
// CHECK-LABEL: test_mm_stream_si128_void
1531+
// CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
1532+
_mm_stream_si128(A, B);
1533+
}
1534+
15111535
__m128i test_mm_sub_epi8(__m128i A, __m128i B) {
15121536
// CHECK-LABEL: test_mm_sub_epi8
15131537
// CHECK: sub <16 x i8>

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) {
358358
return _mm_stream_load_si128(a);
359359
}
360360

361+
__m128i test_mm_stream_load_si128_void(const void *a) {
362+
// CHECK-LABEL: test_mm_stream_load_si128_void
363+
// CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal
364+
return _mm_stream_load_si128(a);
365+
}
366+
361367
int test_mm_test_all_ones(__m128i x) {
362368
// CHECK-LABEL: test_mm_test_all_ones
363369
// CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})

clang/test/CodeGen/X86/sse4a-builtins.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,23 @@ void test_mm_stream_sd(double *p, __m128d a) {
3737
_mm_stream_sd(p, a);
3838
}
3939

40+
void test_mm_stream_sd_void(void *p, __m128d a) {
41+
// CHECK-LABEL: test_mm_stream_sd_void
42+
// CHECK: extractelement <2 x double> %{{.*}}, i64 0
43+
// CHECK: store double %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
44+
_mm_stream_sd(p, a);
45+
}
46+
4047
void test_mm_stream_ss(float *p, __m128 a) {
4148
// CHECK-LABEL: test_mm_stream_ss
4249
// CHECK: extractelement <4 x float> %{{.*}}, i64 0
4350
// CHECK: store float %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
4451
_mm_stream_ss(p, a);
4552
}
53+
54+
void test_mm_stream_s_void(void *p, __m128 a) {
55+
// CHECK-LABEL: test_mm_stream_s_void
56+
// CHECK: extractelement <4 x float> %{{.*}}, i64 0
57+
// CHECK: store float %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
58+
_mm_stream_ss(p, a);
59+
}

0 commit comments

Comments
 (0)