Skip to content

Commit 927afdd

Browse files
authored
Merge branch 'master' into add_stop_token
2 parents f7229f2 + b608b55 commit 927afdd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+8100
-2304
lines changed

.devops/tools.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
2323
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
2424
else
2525
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
26-
./quantize "$i" "${i/f16/q4_0}" 2
26+
./quantize "$i" "${i/f16/q4_0}" q4_0
2727
fi
2828
done
2929
else

.github/workflows/build.yml

Lines changed: 148 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,15 @@ on:
1212
- master
1313
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
1414
pull_request:
15-
types: [opened, synchronize, edited, reopened, review_requested, ready_for_review]
15+
types: [opened, synchronize, reopened]
1616
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2020

2121
jobs:
22-
ubuntu-latest-make:
23-
if: github.event.pull_request.draft == false
24-
25-
runs-on: ubuntu-latest
22+
ubuntu-focal-make:
23+
runs-on: ubuntu-20.04
2624

2725
steps:
2826
- name: Clone
@@ -33,16 +31,14 @@ jobs:
3331
id: depends
3432
run: |
3533
sudo apt-get update
36-
sudo apt-get install build-essential
34+
sudo apt-get install build-essential gcc-8
3735
3836
- name: Build
3937
id: make_build
4038
run: |
41-
make
39+
CC=gcc-8 make
4240
4341
ubuntu-latest-cmake:
44-
if: github.event.pull_request.draft == false
45-
4642
runs-on: ubuntu-latest
4743

4844
steps:
@@ -71,8 +67,6 @@ jobs:
7167
ctest --verbose
7268
7369
ubuntu-latest-cmake-sanitizer:
74-
if: github.event.pull_request.draft == false
75-
7670
runs-on: ubuntu-latest
7771

7872
continue-on-error: true
@@ -108,8 +102,6 @@ jobs:
108102
ctest --verbose
109103
110104
macOS-latest-make:
111-
if: github.event.pull_request.draft == false
112-
113105
runs-on: macos-latest
114106

115107
steps:
@@ -128,9 +120,7 @@ jobs:
128120
make
129121
130122
macOS-latest-cmake:
131-
if: github.event.pull_request.draft == false
132-
133-
runs-on: macOS-latest
123+
runs-on: macos-latest
134124

135125
steps:
136126
- name: Clone
@@ -157,32 +147,87 @@ jobs:
157147
ctest --verbose
158148
159149
windows-latest-cmake:
160-
if: github.event.pull_request.draft == false
161-
162150
runs-on: windows-latest
151+
env:
152+
OPENBLAS_VERSION: 0.3.23
153+
OPENCL_VERSION: 2023.04.17
154+
CLBLAST_VERSION: 1.5.3
163155

164156
strategy:
165157
matrix:
166158
include:
167-
- build: 'avx2'
168-
defines: ''
169-
- build: 'avx'
170-
defines: '-DLLAMA_AVX2=OFF'
171-
- build: 'avx512'
172-
defines: '-DLLAMA_AVX512=ON'
159+
- build: 'avx2'
160+
defines: ''
161+
- build: 'avx'
162+
defines: '-DLLAMA_AVX2=OFF'
163+
- build: 'avx512'
164+
defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
165+
- build: 'clblast'
166+
defines: '-DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
167+
- build: 'openblas'
168+
defines: '-DLLAMA_OPENBLAS=ON -DBLAS_LIBRARIES="/LIBPATH:$env:RUNNER_TEMP/openblas/lib" -DOPENBLAS_INC="$env:RUNNER_TEMP/openblas/include"'
173169

174170
steps:
175171
- name: Clone
176172
id: checkout
177173
uses: actions/checkout@v1
178174

175+
- name: Download OpenCL SDK
176+
id: get_opencl
177+
if: ${{ matrix.build == 'clblast' }}
178+
run: |
179+
curl.exe -o $env:RUNNER_TEMP/opencl.zip -L "https://github.com/KhronosGroup/OpenCL-SDK/releases/download/v${env:OPENCL_VERSION}/OpenCL-SDK-v${env:OPENCL_VERSION}-Win-x64.zip"
180+
mkdir $env:RUNNER_TEMP/opencl
181+
tar.exe -xvf $env:RUNNER_TEMP/opencl.zip --strip-components=1 -C $env:RUNNER_TEMP/opencl
182+
183+
- name: Download CLBlast
184+
id: get_clblast
185+
if: ${{ matrix.build == 'clblast' }}
186+
run: |
187+
curl.exe -o $env:RUNNER_TEMP/clblast.zip -L "https://github.com/CNugteren/CLBlast/releases/download/${env:CLBLAST_VERSION}/CLBlast-${env:CLBLAST_VERSION}-Windows-x64.zip"
188+
curl.exe -o $env:RUNNER_TEMP/CLBlast.LICENSE.txt -L "https://github.com/CNugteren/CLBlast/raw/${env:CLBLAST_VERSION}/LICENSE"
189+
mkdir $env:RUNNER_TEMP/clblast
190+
tar.exe -xvf $env:RUNNER_TEMP/clblast.zip -C $env:RUNNER_TEMP/clblast
191+
foreach ($f in (gci -Recurse -Path "$env:RUNNER_TEMP/clblast" -Filter '*.cmake')) {
192+
$txt = Get-Content -Path $f -Raw
193+
$txt.Replace('C:/dependencies/opencl/', "$($env:RUNNER_TEMP.Replace('\','/'))/opencl/") | Set-Content -Path $f -Encoding UTF8
194+
}
195+
196+
- name: Download OpenBLAS
197+
id: get_openblas
198+
if: ${{ matrix.build == 'openblas' }}
199+
run: |
200+
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
201+
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
202+
mkdir $env:RUNNER_TEMP/openblas
203+
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
204+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
205+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
206+
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
207+
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
208+
179209
- name: Build
180210
id: cmake_build
181211
run: |
182212
mkdir build
183213
cd build
184214
cmake .. ${{ matrix.defines }}
185215
cmake --build . --config Release
216+
cp ../LICENSE ./bin/Release/llama.cpp.txt
217+
218+
- name: Add clblast.dll
219+
id: add_clblast_dll
220+
if: ${{ matrix.build == 'clblast' }}
221+
run: |
222+
cp $env:RUNNER_TEMP/clblast/lib/clblast.dll ./build/bin/Release
223+
cp $env:RUNNER_TEMP/CLBlast.LICENSE.txt ./build/bin/Release/CLBlast-${env:CLBLAST_VERSION}.txt
224+
225+
- name: Add libopenblas.dll
226+
id: add_libopenblas_dll
227+
if: ${{ matrix.build == 'openblas' }}
228+
run: |
229+
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
230+
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
186231
187232
- name: Check AVX512F support
188233
id: check_avx512f
@@ -199,7 +244,7 @@ jobs:
199244
200245
- name: Test
201246
id: cmake_test
202-
if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible
247+
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
203248
run: |
204249
cd build
205250
ctest -C Release --verbose
@@ -222,17 +267,94 @@ jobs:
222267
path: |
223268
llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip
224269
270+
windows-latest-cmake-cublas:
271+
runs-on: windows-latest
272+
273+
strategy:
274+
matrix:
275+
cuda: ['12.1.0', '11.7.1']
276+
build: ['cublas']
277+
278+
steps:
279+
- name: Clone
280+
id: checkout
281+
uses: actions/checkout@v1
282+
283+
- uses: Jimver/[email protected]
284+
id: cuda-toolkit
285+
with:
286+
cuda: ${{ matrix.cuda }}
287+
# TODO(green-sky): _dev seems to fail, and non dev are not enought
288+
#sub-packages: '["nvcc", "cudart", "cublas", "cudart_dev", "cublas_dev"]'
289+
290+
- name: Build
291+
id: cmake_build
292+
run: |
293+
mkdir build
294+
cd build
295+
cmake .. -DLLAMA_CUBLAS=ON
296+
cmake --build . --config Release
297+
298+
- name: Get commit hash
299+
id: commit
300+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
301+
uses: pr-mpt/actions-commit-hash@v2
302+
303+
- name: Pack artifacts
304+
id: pack_artifacts
305+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
306+
run: |
307+
7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
308+
309+
- name: Upload artifacts
310+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
311+
uses: actions/upload-artifact@v3
312+
with:
313+
path: |
314+
llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
315+
316+
- name: Copy and pack Cuda runtime
317+
if: ${{ matrix.cuda == '12.1.0' }}
318+
# TODO(green-sky): paths are cuda 12 specific
319+
run: |
320+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
321+
mkdir '.\build\bin\cudart\'
322+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_12.dll" '.\build\bin\cudart\'
323+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_12.dll" '.\build\bin\cudart\'
324+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_12.dll" '.\build\bin\cudart\'
325+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
326+
327+
- name: Copy and pack Cuda runtime
328+
if: ${{ matrix.cuda == '11.7.1' }}
329+
# TODO(green-sky): paths are cuda 11 specific
330+
run: |
331+
echo "Cuda install location: ${{steps.cuda-toolkit.outputs.CUDA_PATH}}"
332+
mkdir '.\build\bin\cudart\'
333+
ls "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin"
334+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cudart64_110.dll" '.\build\bin\cudart\'
335+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublas64_11.dll" '.\build\bin\cudart\'
336+
cp "${{steps.cuda-toolkit.outputs.CUDA_PATH}}\bin\cublasLt64_11.dll" '.\build\bin\cudart\'
337+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip .\build\bin\cudart\*
338+
339+
- name: Upload Cuda runtime
340+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
341+
uses: actions/upload-artifact@v3
342+
with:
343+
path: |
344+
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
345+
225346
release:
226347
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
227348

228349
runs-on: ubuntu-latest
229350

230351
needs:
231-
- ubuntu-latest-make
352+
- ubuntu-focal-make
232353
- ubuntu-latest-cmake
233354
- macOS-latest-make
234355
- macOS-latest-cmake
235356
- windows-latest-cmake
357+
- windows-latest-cmake-cublas
236358

237359
steps:
238360
- name: Download artifacts

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,25 @@ build-em/
1515
build-debug/
1616
build-release/
1717
build-static/
18+
build-cublas/
1819
build-no-accel/
1920
build-sanitize-addr/
2021
build-sanitize-thread/
2122

2223
models/*
24+
*.bin
2325

2426
/main
2527
/quantize
2628
/quantize-stats
2729
/result
2830
/perplexity
2931
/embedding
30-
/benchmark-q4_0-matmult
32+
/benchmark-matmult
3133
/vdot
3234
/Pipfile
3335

36+
build-info.h
3437
arm_neon.h
3538
compile_commands.json
3639

@@ -40,3 +43,6 @@ zig-out/
4043
zig-cache/
4144

4245
ppl-*.txt
46+
qnt-*.txt
47+
48+
examples/jeopardy/results.txt

0 commit comments

Comments
 (0)