From 636d56818a0d63a54acd7fd4added4343a204d92 Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:00:20 -0800 Subject: [PATCH 1/6] Port to Visual C++. - Combined nmake/Unix Makefile. - _alloca instead of variable size array. - Cast void* to char* for math. - C++20 for designated initializers. It builds. I haven't run it yet. --- Makefile | 69 ++++++++++++++++++++++++++++++++++++++++++++++--------- ggml.c | 20 ++++++++-------- main.cpp | 3 ++- utils.cpp | 14 ++++++++--- 4 files changed, 81 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 8388c290d75ce..0887e99ae223a 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,47 @@ +# This one Makefile works with Microsoft nmake and Unix make. +# They use different conditional syntax, but each can be nested and inverted within the other. + +all: default + +ifdef MAKEDIR: +!ifdef MAKEDIR + +# Windows code. + +CXX = cl +# C++20 for designated initializers +# TODO: Detect AVX. +CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 +CC = $(CXX) +CFLAGS = $(CXXFLAGS) +LDFLAGS=/incremental:no +LINK_OUT = /out: +CC_OUT = /Fo +O = obj +EXE = .exe +RM_F = del 2>nul /f +UNIX_SPACE = +START_LINK_FLAGS = /link + +# The need for this is surprising but otherwise there is an access violation +# running main -h. +SLASH=^\ + +!else +else + +# Unix code. + +O = o +EXE= +UNIX_SPACE = " " +UNIX_SPACE := $(UNIX_SPACE:"=) +LINK_OUT = -o$(UNIX_SPACE) +CC_OUT=-o$(UNIX_SPACE) +RM_F = rm -f +START_LINK_FLAGS= +SLASH=/ + ifndef UNAME_S UNAME_S := $(shell uname -s) endif @@ -171,28 +215,31 @@ $(info I LDFLAGS: $(LDFLAGS)) $(info I CC: $(CCV)) $(info I CXX: $(CXXV)) $(info ) + +endif +!endif : -default: main quantize +default: main$(EXE) quantize$(EXE) # # Build library # -ggml.o: ggml.c ggml.h - $(CC) $(CFLAGS) -c ggml.c -o ggml.o +ggml.$O: ggml.c ggml.h + $(CC) $(CFLAGS) -c ggml.c $(CC_OUT)ggml.$O -utils.o: utils.cpp utils.h - $(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o +utils.$O: utils.cpp utils.h + $(CXX) $(CXXFLAGS) -c utils.cpp $(CC_OUT)utils.$O clean: - rm -f *.o main quantize + $(RM_F) *.$O main$(EXE) quantize$(EXE) -main: main.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS) - ./main -h +main$(EXE): main.cpp ggml.$O utils.$O + $(CXX) $(CXXFLAGS) main.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)main$(EXE) $(LDFLAGS) + .$(SLASH)main.exe -h -quantize: quantize.cpp ggml.o utils.o - $(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS) +quantize$(EXE): quantize.cpp ggml.$O utils.$O + $(CXX) $(CXXFLAGS) quantize.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)quantize$(EXE) $(LDFLAGS) # # Tests diff --git a/ggml.c b/ggml.c index 71c30280b1066..3eb716c90165d 100644 --- a/ggml.c +++ b/ggml.c @@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - uint8_t * restrict pd = (uint8_t *) (y + 0*bs); - uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float)); + uint8_t * restrict pd = (uint8_t *) ((char*)y + 0*bs); + uint8_t * restrict pb = (uint8_t *) ((char*)y + 0*bs + sizeof(float)); uint8_t pp[QK/2]; @@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); + const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); // scalar for (int i = 0; i < nb; i++) { @@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs); - const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs); + const uint8_t * restrict pd0 = (const uint8_t *) ((char*)x + 0*bs); + const uint8_t * restrict pd1 = (const uint8_t *) ((char*)y + 0*bs); - const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float)); - const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float)); + const uint8_t * restrict pb0 = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); + const uint8_t * restrict pb1 = (const uint8_t *) ((char*)y + 0*bs + sizeof(float)); float sumf = 0.0; @@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res const int nb = n / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); + const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); #if __ARM_NEON #if QK == 32 diff --git a/main.cpp b/main.cpp index 2f47480698f1e..3d9053362c751 100644 --- a/main.cpp +++ b/main.cpp @@ -732,7 +732,8 @@ bool llama_eval( return true; } -int main(int argc, char ** argv) { +int main(int argc, char ** argv) { + ggml_time_init(); const int64_t t_main_start_us = ggml_time_us(); gpt_params params; diff --git a/utils.cpp b/utils.cpp index abb34756ac026..01dd223cfea24 100644 --- a/utils.cpp +++ b/utils.cpp @@ -452,8 +452,12 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t const size_t row_size = nb*bs; assert(k % qk == 0); - - uint8_t pp[qk/2]; + +#if _MSC_VER + uint8_t* pp = (uint8_t*)_alloca(qk / 2); +#else + uint8_t pp[qk/2]; +#endif char * pdst = (char *) dst; @@ -507,7 +511,11 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t assert(k % qk == 0); - uint8_t pp[qk/2]; +#if _MSC_VER + uint8_t* pp = (uint8_t*)_alloca(qk / 2); +#else + uint8_t pp[qk/2]; +#endif char * pdst = (char *) dst; From a05225f194d9edce58dc18205d332490da7af9bf Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:27:38 -0800 Subject: [PATCH 2/6] LTCG and Win32 is Win32, not specific compilers. --- Makefile | 2 +- ggml.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0887e99ae223a..bd09c8cf88345 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifdef MAKEDIR: CXX = cl # C++20 for designated initializers # TODO: Detect AVX. -CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 +CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL CC = $(CXX) CFLAGS = $(CXXFLAGS) LDFLAGS=/incremental:no diff --git a/ggml.c b/ggml.c index 3eb716c90165d..d3e44b6e1dd0a 100644 --- a/ggml.c +++ b/ggml.c @@ -285,8 +285,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) { // timing // -#if defined(_MSC_VER) || defined(__MINGW32__) +#if _WIN32 static int64_t timer_freq; + void ggml_time_init(void) { LARGE_INTEGER frequency; QueryPerformanceFrequency(&frequency); From ec64cfa1db73108f4657412ca40f08968b2a360f Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:34:06 -0800 Subject: [PATCH 3/6] define NDEBUG, and simplify casts. --- Makefile | 2 +- ggml.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index bd09c8cf88345..42db7c04089dc 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifdef MAKEDIR: CXX = cl # C++20 for designated initializers # TODO: Detect AVX. -CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL +CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG CC = $(CXX) CFLAGS = $(CXXFLAGS) LDFLAGS=/incremental:no diff --git a/ggml.c b/ggml.c index d3e44b6e1dd0a..8288b997e6973 100644 --- a/ggml.c +++ b/ggml.c @@ -408,8 +408,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - uint8_t * restrict pd = (uint8_t *) ((char*)y + 0*bs); - uint8_t * restrict pb = (uint8_t *) ((char*)y + 0*bs + sizeof(float)); + uint8_t * restrict pd = (uint8_t *)y + 0*bs; + uint8_t * restrict pb = (uint8_t *)y + 0*bs + sizeof(float); uint8_t pp[QK/2]; @@ -655,8 +655,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) { const int nb = k / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float); // scalar for (int i = 0; i < nb; i++) { @@ -1302,11 +1302,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd0 = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pd1 = (const uint8_t *) ((char*)y + 0*bs); + const uint8_t * restrict pd0 = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pd1 = (const uint8_t *)y + 0*bs; - const uint8_t * restrict pb0 = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); - const uint8_t * restrict pb1 = (const uint8_t *) ((char*)y + 0*bs + sizeof(float)); + const uint8_t * restrict pb0 = (const uint8_t *)x + 0*bs + sizeof(float); + const uint8_t * restrict pb1 = (const uint8_t *)y + 0*bs + sizeof(float); float sumf = 0.0; @@ -1732,8 +1732,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res const int nb = n / QK; const size_t bs = sizeof(float) + QK/2; - const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs); - const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float)); + const uint8_t * restrict pd = (const uint8_t *)x + 0*bs; + const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float); #if __ARM_NEON #if QK == 32 From 3225d9b8a06a2c8b0e6cf3e7f752badc06591d89 Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:36:05 -0800 Subject: [PATCH 4/6] nologo --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 42db7c04089dc..30bbf109f40f1 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifdef MAKEDIR: CXX = cl # C++20 for designated initializers # TODO: Detect AVX. -CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG +CXXFLAGS = /nologo /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG CC = $(CXX) CFLAGS = $(CXXFLAGS) LDFLAGS=/incremental:no From bea4af5c9ce5f000f8c2dcdc6c64394567277578 Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:45:08 -0800 Subject: [PATCH 5/6] Fix sizeof(pp). --- utils.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/utils.cpp b/utils.cpp index 01dd223cfea24..bbebfb727b629 100644 --- a/utils.cpp +++ b/utils.cpp @@ -453,10 +453,11 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t assert(k % qk == 0); + size_t const pp_size = (qk / 2); #if _MSC_VER - uint8_t* pp = (uint8_t*)_alloca(qk / 2); + uint8_t* pp = (uint8_t*)_alloca(pp_size); #else - uint8_t pp[qk/2]; + uint8_t pp[pp_size]; #endif char * pdst = (char *) dst; @@ -496,7 +497,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t pp[l/2] = vi0 | (vi1 << 4); } - memcpy(pb, pp, sizeof(pp)); + memcpy(pb, pp, pp_size); pb += bs; } } @@ -511,10 +512,11 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t assert(k % qk == 0); + size_t const pp_size = (qk / 2); #if _MSC_VER - uint8_t* pp = (uint8_t*)_alloca(qk / 2); + uint8_t* pp = (uint8_t*)_alloca(pp_size); #else - uint8_t pp[qk/2]; + uint8_t pp[pp_size]; #endif char * pdst = (char *) dst; @@ -559,7 +561,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t pp[l/2] = vi0 | (vi1 << 4); } - memcpy(pb + i*qk/2, pp, sizeof(pp)); + memcpy(pb + i*qk/2, pp, pp_size); } } } From c2201a9a833ab0eb3b0197c5a13612df04726833 Mon Sep 17 00:00:00 2001 From: Jay Krell Date: Sat, 11 Mar 2023 22:55:17 -0800 Subject: [PATCH 6/6] Add comment. --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 30bbf109f40f1..e646541347de4 100644 --- a/Makefile +++ b/Makefile @@ -218,6 +218,8 @@ $(info ) endif !endif : + +# Common Unix/Windows code. default: main$(EXE) quantize$(EXE)