From de69ea86b0bab6ba4397d74db2940157aa875843 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 16:42:14 +0530 Subject: [PATCH 1/5] fix f16_sycl cpy call --- ggml-sycl.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index a03df4c654303..e430643640e9a 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12149,6 +12149,25 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( const dpct::queue_ptr &stream) { const int64_t ne00 = src0->ne[0]; + const int64_t ne01 = src0->ne[1]; + const int64_t ne02 = src0->ne[2]; + + + const int64_t nb00 = src0->nb[0]; + const int64_t nb01 = src0->nb[1]; + const int64_t nb02 = src0->nb[2]; + const int64_t nb03 = src0->nb[3]; + + const int64_t ne10 = src1->ne[0]; + const int64_t ne11 = src1->ne[1]; + const int64_t ne12 = src1->ne[2]; + + + const int64_t nb10 = src1->nb[0]; + const int64_t nb11 = src1->nb[1]; + const int64_t nb12 = src1->nb[2]; + const int64_t nb13 = src1->nb[3]; + const int64_t row_diff = row_high - row_low; // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics @@ -12166,9 +12185,13 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size; } else { src1_dfloat = src1_dfloat_a.alloc(ne00); + //ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, + // ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, + // sizeof(sycl::half), 0, 0, stream); ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, - ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, - sizeof(sycl::half), 0, 0, stream); + ne00, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, + nb13, stream); } } #else From c4c32f295458621e8d0273f59e15d39017be11da Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:26:27 +0530 Subject: [PATCH 2/5] rm old logic --- ggml-sycl.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index e430643640e9a..12fc0cf72ff04 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12185,9 +12185,6 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( src1_dfloat = (sycl::half *)src1->data + src1_padded_row_size; } else { src1_dfloat = src1_dfloat_a.alloc(ne00); - //ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, - // ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, - // sizeof(sycl::half), 0, 0, stream); ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, ne00, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, From 6b40e5ac82169441b3cdf7830390add5917d2987 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:37:49 +0530 Subject: [PATCH 3/5] add fp16 build CI --- .github/workflows/build.yml | 41 +++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f4c374ce5c639..166d31aa7d6d6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -183,6 +183,47 @@ jobs: cd build cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. cmake --build . --config Release -j $(nproc) + + ubuntu-22-cmake-sycl-fp16: + runs-on: ubuntu-22.04 + + continue-on-error: true + + steps: + - uses: actions/checkout@v2 + + - name: add oneAPI to apt + shell: bash + run: | + cd /tmp + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + + - name: install oneAPI dpcpp compiler + shell: bash + run: | + sudo apt update + sudo apt install intel-oneapi-compiler-dpcpp-cpp + + - name: install oneAPI MKL library + shell: bash + run: | + sudo apt install intel-oneapi-mkl-devel + + - name: Clone + id: checkout + uses: actions/checkout@v3 + + - name: Build + id: cmake_build + run: | + source /opt/intel/oneapi/setvars.sh + mkdir build + cd build + cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON .. + cmake --build . --config Release -j $(nproc) # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know # how to debug it. From 6bf368e7bdea9aa8564d5026cafe0ccb4fd379b1 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 19:00:10 +0530 Subject: [PATCH 4/5] use macro --- ggml-sycl.cpp | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 12fc0cf72ff04..dd562a89828eb 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -12148,26 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( const int64_t src1_ncols, const int64_t src1_padded_row_size, const dpct::queue_ptr &stream) { - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int64_t ne02 = src0->ne[2]; - - - const int64_t nb00 = src0->nb[0]; - const int64_t nb01 = src0->nb[1]; - const int64_t nb02 = src0->nb[2]; - const int64_t nb03 = src0->nb[3]; - - const int64_t ne10 = src1->ne[0]; - const int64_t ne11 = src1->ne[1]; - const int64_t ne12 = src1->ne[2]; - + GGML_TENSOR_BINARY_OP_LOCALS - const int64_t nb10 = src1->nb[0]; - const int64_t nb11 = src1->nb[1]; - const int64_t nb12 = src1->nb[2]; - const int64_t nb13 = src1->nb[3]; - const int64_t row_diff = row_high - row_low; // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics @@ -12186,9 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( } else { src1_dfloat = src1_dfloat_a.alloc(ne00); ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, - ne00, ne00, ne01, ne02, nb00, nb01, nb02, - nb03, ne10, ne11, ne12, nb10, nb11, nb12, - nb13, stream); + ne00, ne00, ne01, ne02, nb00, nb01, nb02, + nb03, ne10, ne11, ne12, nb10, nb11, nb12, + nb13, stream); } } #else From 7320059891d6c506cae5f8b30e9be3175b8f5413 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Thu, 8 Feb 2024 21:48:52 +0530 Subject: [PATCH 5/5] format fix --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 166d31aa7d6d6..ed292d6b8935d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -183,7 +183,7 @@ jobs: cd build cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. cmake --build . --config Release -j $(nproc) - + ubuntu-22-cmake-sycl-fp16: runs-on: ubuntu-22.04