From 636d56818a0d63a54acd7fd4added4343a204d92 Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:00:20 -0800
Subject: [PATCH 1/6] Port to Visual C++.

- Combined nmake/Unix Makefile.
- _alloca instead of variable size array.
- Cast void* to char* for math.
- C++20 for designated initializers.

It builds. I haven't run it yet.
---
 Makefile  | 69 ++++++++++++++++++++++++++++++++++++++++++++++---------
 ggml.c    | 20 ++++++++--------
 main.cpp  |  3 ++-
 utils.cpp | 14 ++++++++---
 4 files changed, 81 insertions(+), 25 deletions(-)

diff --git a/Makefile b/Makefile
index 8388c290d75ce..0887e99ae223a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,47 @@
+# This one Makefile works with Microsoft nmake and Unix make.
+# They use different conditional syntax, but each can be nested and inverted within the other.
+
+all: default
+
+ifdef MAKEDIR:
+!ifdef MAKEDIR
+
+# Windows code.
+
+CXX = cl
+# C++20 for designated initializers
+# TODO: Detect AVX.
+CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20
+CC = $(CXX)
+CFLAGS = $(CXXFLAGS)
+LDFLAGS=/incremental:no
+LINK_OUT = /out:
+CC_OUT = /Fo
+O = obj
+EXE = .exe
+RM_F = del 2>nul /f
+UNIX_SPACE =
+START_LINK_FLAGS = /link
+
+# The need for this is surprising but otherwise there is an access violation
+# running main -h.
+SLASH=^\
+
+!else
+else
+
+# Unix code.
+
+O = o
+EXE=
+UNIX_SPACE = " "
+UNIX_SPACE := $(UNIX_SPACE:"=)
+LINK_OUT = -o$(UNIX_SPACE)
+CC_OUT=-o$(UNIX_SPACE)
+RM_F = rm -f
+START_LINK_FLAGS=
+SLASH=/
+
 ifndef UNAME_S
 UNAME_S := $(shell uname -s)
 endif
@@ -171,28 +215,31 @@ $(info I LDFLAGS:  $(LDFLAGS))
 $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
+
+endif
+!endif :
 
-default: main quantize
+default: main$(EXE) quantize$(EXE)
 
 #
 # Build library
 #
 
-ggml.o: ggml.c ggml.h
-	$(CC)  $(CFLAGS)   -c ggml.c -o ggml.o
+ggml.$O: ggml.c ggml.h
+	$(CC)  $(CFLAGS) -c ggml.c $(CC_OUT)ggml.$O
 
-utils.o: utils.cpp utils.h
-	$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o
+utils.$O: utils.cpp utils.h
+	$(CXX) $(CXXFLAGS) -c utils.cpp $(CC_OUT)utils.$O
 
 clean:
-	rm -f *.o main quantize
+	$(RM_F) *.$O main$(EXE) quantize$(EXE)
 
-main: main.cpp ggml.o utils.o
-	$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
-	./main -h
+main$(EXE): main.cpp ggml.$O utils.$O
+	$(CXX) $(CXXFLAGS) main.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)main$(EXE) $(LDFLAGS)
+	.$(SLASH)main.exe -h
 
-quantize: quantize.cpp ggml.o utils.o
-	$(CXX) $(CXXFLAGS) quantize.cpp ggml.o utils.o -o quantize $(LDFLAGS)
+quantize$(EXE): quantize.cpp ggml.$O utils.$O
+	$(CXX) $(CXXFLAGS) quantize.cpp $(START_LINK_FLAGS) ggml.$O utils.$O $(LINK_OUT)quantize$(EXE) $(LDFLAGS)
 
 #
 # Tests
diff --git a/ggml.c b/ggml.c
index 71c30280b1066..3eb716c90165d 100644
--- a/ggml.c
+++ b/ggml.c
@@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
-    uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
+    uint8_t * restrict pd = (uint8_t *) ((char*)y + 0*bs);
+    uint8_t * restrict pb = (uint8_t *) ((char*)y + 0*bs + sizeof(float));
 
     uint8_t pp[QK/2];
 
@@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs);
+    const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
 
     // scalar
     for (int i = 0; i < nb; i++) {
@@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
 
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
+    const uint8_t * restrict pd0 = (const uint8_t *) ((char*)x + 0*bs);
+    const uint8_t * restrict pd1 = (const uint8_t *) ((char*)y + 0*bs);
 
-    const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
-    const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
+    const uint8_t * restrict pb0 = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
+    const uint8_t * restrict pb1 = (const uint8_t *) ((char*)y + 0*bs + sizeof(float));
 
     float sumf = 0.0;
 
@@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
     const int nb = n / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs);
+    const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
 
 #if __ARM_NEON
 #if QK == 32
diff --git a/main.cpp b/main.cpp
index 2f47480698f1e..3d9053362c751 100644
--- a/main.cpp
+++ b/main.cpp
@@ -732,7 +732,8 @@ bool llama_eval(
     return true;
 }
 
-int main(int argc, char ** argv) {
+int main(int argc, char ** argv) {
+    ggml_time_init();
     const int64_t t_main_start_us = ggml_time_us();
 
     gpt_params params;
diff --git a/utils.cpp b/utils.cpp
index abb34756ac026..01dd223cfea24 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -452,8 +452,12 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
     const size_t row_size = nb*bs;
 
     assert(k % qk == 0);
-
-    uint8_t pp[qk/2];
+
+#if _MSC_VER
+    uint8_t* pp = (uint8_t*)_alloca(qk / 2);
+#else
+    uint8_t pp[qk/2];
+#endif
 
     char * pdst = (char *) dst;
 
@@ -507,7 +511,11 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
-    uint8_t pp[qk/2];
+#if _MSC_VER
+    uint8_t* pp = (uint8_t*)_alloca(qk / 2);
+#else
+    uint8_t pp[qk/2];
+#endif
 
     char * pdst = (char *) dst;
 

From a05225f194d9edce58dc18205d332490da7af9bf Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:27:38 -0800
Subject: [PATCH 2/6] LTCG and Win32 is Win32, not specific compilers.

---
 Makefile | 2 +-
 ggml.c   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 0887e99ae223a..bd09c8cf88345 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ ifdef MAKEDIR:
 CXX = cl
 # C++20 for designated initializers
 # TODO: Detect AVX.
-CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20
+CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL
 CC = $(CXX)
 CFLAGS = $(CXXFLAGS)
 LDFLAGS=/incremental:no
diff --git a/ggml.c b/ggml.c
index 3eb716c90165d..d3e44b6e1dd0a 100644
--- a/ggml.c
+++ b/ggml.c
@@ -285,8 +285,9 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) {
 // timing
 //
 
-#if defined(_MSC_VER) || defined(__MINGW32__)
+#if _WIN32
 static int64_t timer_freq;
+
 void ggml_time_init(void) {
     LARGE_INTEGER frequency;
     QueryPerformanceFrequency(&frequency);

From ec64cfa1db73108f4657412ca40f08968b2a360f Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:34:06 -0800
Subject: [PATCH 3/6] define NDEBUG, and simplify casts.

---
 Makefile |  2 +-
 ggml.c   | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index bd09c8cf88345..42db7c04089dc 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ ifdef MAKEDIR:
 CXX = cl
 # C++20 for designated initializers
 # TODO: Detect AVX.
-CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL
+CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG
 CC = $(CXX)
 CFLAGS = $(CXXFLAGS)
 LDFLAGS=/incremental:no
diff --git a/ggml.c b/ggml.c
index d3e44b6e1dd0a..8288b997e6973 100644
--- a/ggml.c
+++ b/ggml.c
@@ -408,8 +408,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    uint8_t * restrict pd = (uint8_t *) ((char*)y + 0*bs);
-    uint8_t * restrict pb = (uint8_t *) ((char*)y + 0*bs + sizeof(float));
+    uint8_t * restrict pd = (uint8_t *)y + 0*bs;
+    uint8_t * restrict pb = (uint8_t *)y + 0*bs + sizeof(float);
 
     uint8_t pp[QK/2];
 
@@ -655,8 +655,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
     const int nb = k / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
+    const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
 
     // scalar
     for (int i = 0; i < nb; i++) {
@@ -1302,11 +1302,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
 
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd0 = (const uint8_t *) ((char*)x + 0*bs);
-    const uint8_t * restrict pd1 = (const uint8_t *) ((char*)y + 0*bs);
+    const uint8_t * restrict pd0 = (const uint8_t *)x + 0*bs;
+    const uint8_t * restrict pd1 = (const uint8_t *)y + 0*bs;
 
-    const uint8_t * restrict pb0 = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
-    const uint8_t * restrict pb1 = (const uint8_t *) ((char*)y + 0*bs + sizeof(float));
+    const uint8_t * restrict pb0 = (const uint8_t *)x + 0*bs + sizeof(float);
+    const uint8_t * restrict pb1 = (const uint8_t *)y + 0*bs + sizeof(float);
 
     float sumf = 0.0;
 
@@ -1732,8 +1732,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
     const int nb = n / QK;
     const size_t bs = sizeof(float) + QK/2;
 
-    const uint8_t * restrict pd = (const uint8_t *) ((char*)x + 0*bs);
-    const uint8_t * restrict pb = (const uint8_t *) ((char*)x + 0*bs + sizeof(float));
+    const uint8_t * restrict pd = (const uint8_t *)x + 0*bs;
+    const uint8_t * restrict pb = (const uint8_t *)x + 0*bs + sizeof(float);
 
 #if __ARM_NEON
 #if QK == 32

From 3225d9b8a06a2c8b0e6cf3e7f752badc06591d89 Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:36:05 -0800
Subject: [PATCH 4/6] nologo

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 42db7c04089dc..30bbf109f40f1 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ ifdef MAKEDIR:
 CXX = cl
 # C++20 for designated initializers
 # TODO: Detect AVX.
-CXXFLAGS = /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG
+CXXFLAGS = /nologo /MD /Gy /Z7 /EHsc /O2 /arch:AVX2 /std:c++20 /GL /DNDEBUG
 CC = $(CXX)
 CFLAGS = $(CXXFLAGS)
 LDFLAGS=/incremental:no

From bea4af5c9ce5f000f8c2dcdc6c64394567277578 Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:45:08 -0800
Subject: [PATCH 5/6] Fix sizeof(pp).

---
 utils.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/utils.cpp b/utils.cpp
index 01dd223cfea24..bbebfb727b629 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -453,10 +453,11 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
+    size_t const pp_size = (qk / 2);
 #if _MSC_VER
-    uint8_t* pp = (uint8_t*)_alloca(qk / 2);
+    uint8_t* pp = (uint8_t*)_alloca(pp_size);
 #else
-    uint8_t pp[qk/2];
+    uint8_t pp[pp_size];
 #endif
 
     char * pdst = (char *) dst;
@@ -496,7 +497,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb, pp, sizeof(pp));
+                memcpy(pb, pp, pp_size);
                 pb += bs;
             }
         }
@@ -511,10 +512,11 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
 
     assert(k % qk == 0);
 
+    size_t const pp_size = (qk / 2);
 #if _MSC_VER
-    uint8_t* pp = (uint8_t*)_alloca(qk / 2);
+    uint8_t* pp = (uint8_t*)_alloca(pp_size);
 #else
-    uint8_t pp[qk/2];
+    uint8_t pp[pp_size];
 #endif
 
     char * pdst = (char *) dst;
@@ -559,7 +561,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
                     pp[l/2] = vi0 | (vi1 << 4);
                 }
 
-                memcpy(pb + i*qk/2, pp, sizeof(pp));
+                memcpy(pb + i*qk/2, pp, pp_size);
             }
         }
     }

From c2201a9a833ab0eb3b0197c5a13612df04726833 Mon Sep 17 00:00:00 2001
From: Jay Krell <jay.krell@cornell.edu>
Date: Sat, 11 Mar 2023 22:55:17 -0800
Subject: [PATCH 6/6] Add comment.

---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index 30bbf109f40f1..e646541347de4 100644
--- a/Makefile
+++ b/Makefile
@@ -218,6 +218,8 @@ $(info )
 
 endif
 !endif :
+
+# Common Unix/Windows code.
 
 default: main$(EXE) quantize$(EXE)