Skip to content

Commit 4913d0d

Browse files
abhilash1910iThalay
authored andcommitted
whisper : add SYCL support (ggml-org#1863)
* add changes from llama upstream * add sycl abstraction * add sycl build * update cmake * add sycl build config * fix bug * fix bug * refactor build * fix bug * update build * call build * use sycl header * add examples * add target * fix typecast in quant.c * readd fp16 and readme * fix quant typecast * add sample * add readme * remove cxx file check
1 parent 5fd86e3 commit 4913d0d

File tree

9 files changed

+410
-6
lines changed

9 files changed

+410
-6
lines changed

CMakeLists.txt

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,14 @@ if (APPLE)
7070
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
7171
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
7272
else()
73-
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
74-
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
75-
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
76-
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
77-
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
78-
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
73+
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
74+
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
75+
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
76+
option(WHISPER_CUBLAS "whisper: support for cuBLAS" OFF)
77+
option(WHISPER_HIPBLAS "whisper: support for hipBLAS" OFF)
78+
option(WHISPER_CLBLAST "whisper: use CLBlast" OFF)
79+
option(WHISPER_SYCL "whisper: use SYCL" OFF)
80+
option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
7981
endif()
8082

8183
option(WHISPER_PERF "whisper: enable perf timings" OFF)
@@ -106,6 +108,13 @@ endif()
106108

107109
find_package(Threads REQUIRED)
108110

111+
#compile flag sycl
112+
if (WHISPER_SYCL)
113+
set(CMAKE_CXX_STANDARD 17)
114+
else()
115+
set(CMAKE_CXX_STANDARD 11)
116+
endif()
117+
109118
# on APPLE
110119
if (APPLE)
111120
# include Accelerate framework
@@ -309,6 +318,30 @@ if( WHISPER_OPENVINO )
309318
find_package(OpenVINO REQUIRED COMPONENTS Runtime)
310319
endif()
311320

321+
if (WHISPER_SYCL)
322+
if ( NOT DEFINED ENV{ONEAPI_ROOT})
323+
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
324+
endif()
325+
#todo: AOT
326+
327+
find_package(IntelSYCL REQUIRED)
328+
if (WHISPER_SYCL_F16)
329+
add_compile_definitions(GGML_SYCL_F16)
330+
endif()
331+
add_compile_definitions(GGML_USE_SYCL)
332+
333+
add_compile_options(-I./) #include DPCT
334+
add_compile_options(-I/${SYCL_INCLUDE_DIR})
335+
336+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
337+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
338+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
339+
340+
set(GGML_HEADERS_SYCL ggml-sycl.h)
341+
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
342+
343+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
344+
endif()
312345
# compiler flags
313346

314347
if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
@@ -503,6 +536,8 @@ add_library(${TARGET}
503536
${GGML_SOURCES_METAL}
504537
${GGML_SOURCES_CUDA}
505538
${GGML_SOURCES_OPENCL}
539+
${GGML_SOURCES_SYCL}
540+
${GGML_HEADERS_SYCL}
506541
whisper.h
507542
whisper.cpp
508543
)

README_sycl.md

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
# whisper.cpp for SYCL
2+
3+
[Background](#background)
4+
5+
[OS](#os)
6+
7+
[Intel GPU](#intel-gpu)
8+
9+
[Linux](#linux)
10+
11+
[Environment Variable](#environment-variable)
12+
13+
[Known Issue](#known-issue)
14+
15+
[Todo](#todo)
16+
17+
## Background
18+
19+
SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators—such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
20+
21+
oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
22+
23+
Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
24+
25+
To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
26+
27+
The whisper.cpp for SYCL is used to support Intel GPUs.
28+
29+
For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
30+
31+
## OS
32+
33+
|OS|Status|Verified|
34+
|-|-|-|
35+
|Linux|Support|Ubuntu 22.04|
36+
|Windows|Ongoing| |
37+
38+
39+
## Intel GPU
40+
41+
|Intel GPU| Status | Verified Model|
42+
|-|-|-|
43+
|Intel Data Center Max Series| Support| Max 1550|
44+
|Intel Data Center Flex Series| Support| Flex 170|
45+
|Intel Arc Series| Support| Arc 770|
46+
|Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
47+
|Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
48+
49+
50+
## Linux
51+
52+
### Setup Environment
53+
54+
1. Install Intel GPU driver.
55+
56+
a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
57+
58+
Note: for iGPU, please install the client GPU driver.
59+
60+
b. Add user to group: video, render.
61+
62+
```
63+
sudo usermod -aG render username
64+
sudo usermod -aG video username
65+
```
66+
67+
Note: re-login to enable it.
68+
69+
c. Check
70+
71+
```
72+
sudo apt install clinfo
73+
sudo clinfo -l
74+
```
75+
76+
Output (example):
77+
78+
```
79+
Platform #0: Intel(R) OpenCL Graphics
80+
`-- Device #0: Intel(R) Arc(TM) A770 Graphics
81+
82+
83+
Platform #0: Intel(R) OpenCL HD Graphics
84+
`-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
85+
```
86+
87+
2. Install Intel® oneAPI Base toolkit.
88+
89+
90+
a. Please follow the procedure in [Get the Intel® oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
91+
92+
Recommend to install to default folder: **/opt/intel/oneapi**.
93+
94+
Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
95+
96+
b. Check
97+
98+
```
99+
source /opt/intel/oneapi/setvars.sh
100+
101+
sycl-ls
102+
```
103+
104+
There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
105+
106+
Output (example):
107+
```
108+
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
109+
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
110+
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
111+
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
112+
113+
```
114+
115+
2. Build locally:
116+
117+
```
118+
mkdir -p build
119+
cd build
120+
source /opt/intel/oneapi/setvars.sh
121+
122+
#for FP16
123+
#cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
124+
125+
#for FP32
126+
cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
127+
128+
#build example/main only
129+
#cmake --build . --config Release --target main
130+
131+
#build all binary
132+
cmake --build . --config Release -v
133+
134+
```
135+
136+
or
137+
138+
```
139+
./examples/sycl/build.sh
140+
```
141+
142+
Note:
143+
144+
- By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
145+
146+
### Run
147+
148+
1. Put model file to folder **models**
149+
150+
2. Enable oneAPI running environment
151+
152+
```
153+
source /opt/intel/oneapi/setvars.sh
154+
```
155+
156+
3. List device ID
157+
158+
Run without parameter:
159+
160+
```
161+
./build/bin/ls-sycl-device
162+
163+
or
164+
165+
./build/bin/main
166+
```
167+
168+
Check the ID in startup log, like:
169+
170+
```
171+
found 4 SYCL devices:
172+
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
173+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
174+
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
175+
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
176+
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
177+
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
178+
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
179+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
180+
181+
```
182+
183+
|Attribute|Note|
184+
|-|-|
185+
|compute capability 1.3|Level-zero running time, recommended |
186+
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
187+
188+
4. Set device ID and execute whisper.cpp
189+
190+
Set device ID = 0 by **GGML_SYCL_DEVICE=0**
191+
192+
```
193+
GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
194+
```
195+
or run by script:
196+
197+
```
198+
./examples/sycl/run_whisper.sh
199+
```
200+
201+
202+
203+
5. Check the device ID in output
204+
205+
Like:
206+
```
207+
Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
208+
```
209+
210+
211+
## Environment Variable
212+
213+
#### Build
214+
215+
|Name|Value|Function|
216+
|-|-|-|
217+
|WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
218+
|WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
219+
|CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
220+
|CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
221+
222+
#### Running
223+
224+
225+
|Name|Value|Function|
226+
|-|-|-|
227+
|GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
228+
|GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
229+
230+
## Known Issue
231+
232+
- Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
233+
234+
Miss to enable oneAPI running environment.
235+
236+
Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
237+
238+
239+
- Hang during startup
240+
241+
llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
242+
243+
Solution: add **--no-mmap**.
244+
245+
## Todo
246+
247+
- Support to build in Windows.
248+
249+
- Support multiple cards.

examples/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ else()
7979
add_subdirectory(talk)
8080
add_subdirectory(talk-llama)
8181
add_subdirectory(lsp)
82+
if (LLAMA_SYCL)
83+
add_subdirectory(sycl)
84+
endif()
8285
endif()
8386

8487
add_subdirectory(wchess)

examples/sycl/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# MIT license
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: MIT
4+
5+
set(TARGET ls-sycl-device)
6+
add_executable(${TARGET} ls-sycl-device.cpp)
7+
install(TARGETS ${TARGET} RUNTIME)
8+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
9+
target_compile_features(${TARGET} PRIVATE cxx_std_17)

examples/sycl/README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# llama.cpp/example/sycl
2+
3+
This example program provide the tools for llama.cpp for SYCL on Intel GPU.
4+
5+
## Tool
6+
7+
|Tool Name| Function|Status|
8+
|-|-|-|
9+
|ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support|
10+
11+
### ls-sycl-device
12+
13+
List all SYCL devices with ID, compute capability, max work group size, ect.
14+
15+
1. Build the llama.cpp for SYCL for all targets.
16+
17+
2. Enable oneAPI running environment
18+
19+
```
20+
source /opt/intel/oneapi/setvars.sh
21+
```
22+
23+
3. Execute
24+
25+
```
26+
./build/bin/ls-sycl-device
27+
```
28+
29+
Check the ID in startup log, like:
30+
31+
```
32+
found 4 SYCL devices:
33+
Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
34+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
35+
Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
36+
max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
37+
Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
38+
max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
39+
Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
40+
max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
41+
42+
```
43+
44+
|Attribute|Note|
45+
|-|-|
46+
|compute capability 1.3|Level-zero running time, recommended |
47+
|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|

0 commit comments

Comments
 (0)