Skip to content

Commit 5cbc43e

Browse files
NeoZhangJianyuhodlen
authored andcommitted
fix set main gpu error (ggml-org#6073)
1 parent c44ebd1 commit 5cbc43e

File tree

5 files changed

+272
-89
lines changed

5 files changed

+272
-89
lines changed

examples/sycl/build.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@ source /opt/intel/oneapi/setvars.sh
1313
#for FP32
1414
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
1515

16-
#build example/main only
16+
#build example/main
1717
#cmake --build . --config Release --target main
1818

19+
#build example/llama-bench
20+
#cmake --build . --config Release --target llama-bench
21+
1922
#build all binary
2023
cmake --build . --config Release -v

examples/sycl/run-llama2.sh

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,28 @@ source /opt/intel/oneapi/setvars.sh
99

1010
if [ $# -gt 0 ]; then
1111
GGML_SYCL_DEVICE=$1
12+
GGML_SYCL_SINGLE_GPU=1
1213
else
1314
GGML_SYCL_DEVICE=0
1415
fi
15-
echo "use $GGML_SYCL_DEVICE as main GPU"
16+
1617
#export GGML_SYCL_DEBUG=1
1718

1819

1920
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
2021

21-
#use all GPUs with same max compute units
22-
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
22+
if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then
23+
echo "use $GGML_SYCL_DEVICE as main GPU"
24+
#use signle GPU only
25+
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
26+
else
27+
#use multiple GPUs with same max compute units
28+
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
29+
fi
2330

2431
#use main GPU only
2532
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
2633

34+
#use multiple GPUs with same max compute units
35+
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
36+

0 commit comments

Comments
 (0)