File tree 1 file changed +0
-39
lines changed
ggml/src/ggml-cuda/vendors
1 file changed +0
-39
lines changed Original file line number Diff line number Diff line change 130
130
#define cudaKernelNodeParams musaKernelNodeParams
131
131
#define cudaStreamCaptureModeRelaxed musaStreamCaptureModeRelaxed
132
132
#define cudaStreamEndCapture musaStreamEndCapture
133
-
134
- // XXX: Clang builtins mapping
135
- #define __vsub4 __vsub4_musa
136
- #define __vcmpeq4 __vcmpeq4_musa
137
- #define __vcmpne4 __vcmpne4_musa
138
-
139
- #ifndef __has_builtin
140
- #define __has_builtin (x ) 0
141
- #endif
142
-
143
- typedef uint8_t uint8x4_t __attribute__((ext_vector_type (4 )));
144
-
145
- static __device__ __forceinline__ int __vsub4_musa (const int a , const int b ) {
146
- return __vsubss4 (a , b );
147
- }
148
-
149
- static __device__ __forceinline__ unsigned int __vcmpeq4_musa (unsigned int a , unsigned int b ) {
150
- const uint8x4_t & va = reinterpret_cast < const uint8x4_t & > (a );
151
- const uint8x4_t & vb = reinterpret_cast < const uint8x4_t & > (b );
152
- unsigned int c ;
153
- uint8x4_t & vc = reinterpret_cast < uint8x4_t & > (c );
154
- #pragma unroll
155
- for (int i = 0 ; i < 4 ; ++ i ) {
156
- vc [i ] = va [i ] == vb [i ] ? 0xff : 0x00 ;
157
- }
158
- return c ;
159
- }
160
-
161
- static __device__ __forceinline__ unsigned int __vcmpne4_musa (unsigned int a , unsigned int b ) {
162
- const uint8x4_t & va = reinterpret_cast < const uint8x4_t & > (a );
163
- const uint8x4_t & vb = reinterpret_cast < const uint8x4_t & > (b );
164
- unsigned int c ;
165
- uint8x4_t & vc = reinterpret_cast < uint8x4_t & > (c );
166
- #pragma unroll
167
- for (int i = 0 ; i < 4 ; ++ i ) {
168
- vc [i ] = va [i ] == vb [i ] ? 0x00 : 0xff ;
169
- }
170
- return c ;
171
- }
You can’t perform that action at this time.
0 commit comments