Skip to content

Commit 0df1362

Browse files
committed
[CUDA] Fix order of memcpy arguments in __shfl_*(<64-bit type>).
Wrong argument order resulted in broken shfl ops for 64-bit types. (cherry picked from commit cc14de8)
1 parent d11abdd commit 0df1362

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

clang/lib/Headers/__clang_cuda_intrinsics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
4646
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
4747
__Bits __tmp; \
48-
memcpy(&__val, &__tmp, sizeof(__val)); \
48+
memcpy(&__tmp, &__val, sizeof(__val)); \
4949
__tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \
5050
__tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \
5151
long long __ret; \
@@ -129,7 +129,7 @@ __MAKE_SHUFFLES(__shfl_xor, __nvvm_shfl_bfly_i32, __nvvm_shfl_bfly_f32, 0x1f,
129129
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
130130
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
131131
__Bits __tmp; \
132-
memcpy(&__val, &__tmp, sizeof(__val)); \
132+
memcpy(&__tmp, &__val, sizeof(__val)); \
133133
__tmp.__a = ::__FnName(__mask, __tmp.__a, __offset, __width); \
134134
__tmp.__b = ::__FnName(__mask, __tmp.__b, __offset, __width); \
135135
long long __ret; \

0 commit comments

Comments
 (0)