From de69ea86b0bab6ba4397d74db2940157aa875843 Mon Sep 17 00:00:00 2001
From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Date: Thu, 8 Feb 2024 16:42:14 +0530
Subject: [PATCH 1/5] fix f16_sycl cpy call

---
 ggml-sycl.cpp | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index a03df4c654303..e430643640e9a 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12149,6 +12149,25 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
     const dpct::queue_ptr &stream) {
 
     const int64_t ne00 = src0->ne[0];
+    const int64_t ne01 = src0->ne[1];
+    const int64_t ne02 = src0->ne[2];
+
+
+    const int64_t nb00 = src0->nb[0];
+    const int64_t nb01 = src0->nb[1];
+    const int64_t nb02 = src0->nb[2];
+    const int64_t nb03 = src0->nb[3];
+
+    const int64_t ne10 = src1->ne[0];
+    const int64_t ne11 = src1->ne[1];
+    const int64_t ne12 = src1->ne[2];
+
+
+    const int64_t nb10 = src1->nb[0];
+    const int64_t nb11 = src1->nb[1];
+    const int64_t nb12 = src1->nb[2];
+    const int64_t nb13 = src1->nb[3];
+                        
     const int64_t row_diff = row_high - row_low;
 
     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12166,9 +12185,13 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
             src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size;
         } else {
             src1_dfloat = src1_dfloat_a.alloc(ne00);
+            //ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
+            //                      ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
+            //                      sizeof(sycl::half), 0, 0, stream);
             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
-                                  ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
-                                  sizeof(sycl::half), 0, 0, stream);
+                                   ne00, ne00, ne01, ne02, nb00, nb01, nb02,
+                                   nb03, ne10, ne11, ne12, nb10, nb11, nb12,
+                                   nb13, stream);
         }
     }
 #else

From c4c32f295458621e8d0273f59e15d39017be11da Mon Sep 17 00:00:00 2001
From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:26:27 +0530
Subject: [PATCH 2/5] rm old logic

---
 ggml-sycl.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index e430643640e9a..12fc0cf72ff04 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12185,9 +12185,6 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
             src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size;
         } else {
             src1_dfloat = src1_dfloat_a.alloc(ne00);
-            //ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
-            //                      ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
-            //                      sizeof(sycl::half), 0, 0, stream);
             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
                                    ne00, ne00, ne01, ne02, nb00, nb01, nb02,
                                    nb03, ne10, ne11, ne12, nb10, nb11, nb12,

From 6b40e5ac82169441b3cdf7830390add5917d2987 Mon Sep 17 00:00:00 2001
From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Date: Thu, 8 Feb 2024 17:37:49 +0530
Subject: [PATCH 3/5] add fp16 build CI

---
 .github/workflows/build.yml | 41 +++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f4c374ce5c639..166d31aa7d6d6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -183,6 +183,47 @@ jobs:
           cd build
           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
           cmake --build . --config Release -j $(nproc)
+  
+  ubuntu-22-cmake-sycl-fp16:
+    runs-on: ubuntu-22.04
+
+    continue-on-error: true
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: add oneAPI to apt
+        shell: bash
+        run: |
+          cd /tmp
+          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+
+      - name: install oneAPI dpcpp compiler
+        shell: bash
+        run: |
+          sudo apt update
+          sudo apt install intel-oneapi-compiler-dpcpp-cpp
+
+      - name: install oneAPI MKL library
+        shell: bash
+        run: |
+          sudo apt install intel-oneapi-mkl-devel
+
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+
+      - name: Build
+        id: cmake_build
+        run: |
+          source /opt/intel/oneapi/setvars.sh
+          mkdir build
+          cd build
+          cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON ..
+          cmake --build . --config Release -j $(nproc)
 
   # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
   #       how to debug it.

From 6bf368e7bdea9aa8564d5026cafe0ccb4fd379b1 Mon Sep 17 00:00:00 2001
From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Date: Thu, 8 Feb 2024 19:00:10 +0530
Subject: [PATCH 4/5] use macro

---
 ggml-sycl.cpp | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index 12fc0cf72ff04..dd562a89828eb 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -12148,26 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
     const int64_t src1_ncols, const int64_t src1_padded_row_size,
     const dpct::queue_ptr &stream) {
 
-    const int64_t ne00 = src0->ne[0];
-    const int64_t ne01 = src0->ne[1];
-    const int64_t ne02 = src0->ne[2];
-
-
-    const int64_t nb00 = src0->nb[0];
-    const int64_t nb01 = src0->nb[1];
-    const int64_t nb02 = src0->nb[2];
-    const int64_t nb03 = src0->nb[3];
-
-    const int64_t ne10 = src1->ne[0];
-    const int64_t ne11 = src1->ne[1];
-    const int64_t ne12 = src1->ne[2];
-
+    GGML_TENSOR_BINARY_OP_LOCALS
 
-    const int64_t nb10 = src1->nb[0];
-    const int64_t nb11 = src1->nb[1];
-    const int64_t nb12 = src1->nb[2];
-    const int64_t nb13 = src1->nb[3];
-                        
     const int64_t row_diff = row_high - row_low;
 
     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12186,9 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
         } else {
             src1_dfloat = src1_dfloat_a.alloc(ne00);
             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
-                                   ne00, ne00, ne01, ne02, nb00, nb01, nb02,
-                                   nb03, ne10, ne11, ne12, nb10, nb11, nb12,
-                                   nb13, stream);
+                                  ne00, ne00, ne01, ne02, nb00, nb01, nb02,
+                                  nb03, ne10, ne11, ne12, nb10, nb11, nb12,
+                                  nb13, stream);
         }
     }
 #else

From 7320059891d6c506cae5f8b30e9be3175b8f5413 Mon Sep 17 00:00:00 2001
From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
Date: Thu, 8 Feb 2024 21:48:52 +0530
Subject: [PATCH 5/5] format fix

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 166d31aa7d6d6..ed292d6b8935d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -183,7 +183,7 @@ jobs:
           cd build
           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
           cmake --build . --config Release -j $(nproc)
-  
+
   ubuntu-22-cmake-sycl-fp16:
     runs-on: ubuntu-22.04