2
2
3
3
#if defined(_MSC_VER ) || defined(__MINGW32__ )
4
4
#include <malloc.h> // using malloc.h with MSC/MINGW
5
- #elif !defined(__FreeBSD__ )
5
+ #elif !defined(__FreeBSD__ ) && !defined( __NetBSD__ )
6
6
#include <alloca.h>
7
7
#endif
8
8
@@ -364,7 +364,7 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
364
364
#if __AVX2__
365
365
// Unpack 32 4-bit fields into 32 bytes
366
366
// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
367
- inline __m256i bytesFromNibbles ( const uint8_t * rsi )
367
+ static inline __m256i bytesFromNibbles ( const uint8_t * rsi )
368
368
{
369
369
// Load 16 bytes from memory
370
370
__m128i tmp = _mm_loadu_si128 ( ( const __m128i * )rsi );
@@ -381,7 +381,7 @@ inline __m256i bytesFromNibbles( const uint8_t* rsi )
381
381
return bytes ;
382
382
}
383
383
384
- inline __m128i packNibbles ( __m256i bytes )
384
+ static inline __m128i packNibbles ( __m256i bytes )
385
385
{
386
386
// Move bits within 16-bit lanes from 0000_abcd_0000_efgh into 0000_0000_abcd_efgh
387
387
const __m256i lowByte = _mm256_set1_epi16 ( 0xFF );
@@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
407
407
const int nb = k / QK ;
408
408
const size_t bs = sizeof (float ) + QK /2 ;
409
409
410
- uint8_t * restrict pd = (uint8_t * ) ( y + 0 * bs );
411
- uint8_t * restrict pb = (uint8_t * ) ( y + 0 * bs + sizeof (float ));
410
+ uint8_t * restrict pd = (( uint8_t * )y + 0 * bs );
411
+ uint8_t * restrict pb = (( uint8_t * )y + 0 * bs + sizeof (float ));
412
412
413
413
uint8_t pp [QK /2 ];
414
414
@@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
654
654
const int nb = k / QK ;
655
655
const size_t bs = sizeof (float ) + QK /2 ;
656
656
657
- const uint8_t * restrict pd = (const uint8_t * ) ( x + 0 * bs );
658
- const uint8_t * restrict pb = (const uint8_t * ) ( x + 0 * bs + sizeof (float ));
657
+ const uint8_t * restrict pd = (( const uint8_t * )x + 0 * bs );
658
+ const uint8_t * restrict pb = (( const uint8_t * )x + 0 * bs + sizeof (float ));
659
659
660
660
// scalar
661
661
for (int i = 0 ; i < nb ; i ++ ) {
@@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
1301
1301
1302
1302
const size_t bs = sizeof (float ) + QK /2 ;
1303
1303
1304
- const uint8_t * restrict pd0 = (const uint8_t * ) ( x + 0 * bs );
1305
- const uint8_t * restrict pd1 = (const uint8_t * ) ( y + 0 * bs );
1304
+ const uint8_t * restrict pd0 = (( const uint8_t * )x + 0 * bs );
1305
+ const uint8_t * restrict pd1 = (( const uint8_t * )y + 0 * bs );
1306
1306
1307
- const uint8_t * restrict pb0 = (const uint8_t * ) ( x + 0 * bs + sizeof (float ));
1308
- const uint8_t * restrict pb1 = (const uint8_t * ) ( y + 0 * bs + sizeof (float ));
1307
+ const uint8_t * restrict pb0 = (( const uint8_t * )x + 0 * bs + sizeof (float ));
1308
+ const uint8_t * restrict pb1 = (( const uint8_t * )y + 0 * bs + sizeof (float ));
1309
1309
1310
1310
float sumf = 0.0 ;
1311
1311
@@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
1731
1731
const int nb = n / QK ;
1732
1732
const size_t bs = sizeof (float ) + QK /2 ;
1733
1733
1734
- const uint8_t * restrict pd = (const uint8_t * ) ( x + 0 * bs );
1735
- const uint8_t * restrict pb = (const uint8_t * ) ( x + 0 * bs + sizeof (float ));
1734
+ const uint8_t * restrict pd = (( const uint8_t * )x + 0 * bs );
1735
+ const uint8_t * restrict pb = (( const uint8_t * )x + 0 * bs + sizeof (float ));
1736
1736
1737
1737
#if __ARM_NEON
1738
1738
#if QK == 32
0 commit comments