Skip to content

Commit 0882fce

Browse files
authored
Merge pull request #833 from SciSharp/july-2024-binaries
July 2024 binaries
2 parents 3a7fd37 + fc6d4dd commit 0882fce

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+510
-6617
lines changed

.github/workflows/compile.yml

Lines changed: 120 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ jobs:
2727
matrix:
2828
include:
2929
- build: 'noavx'
30-
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
30+
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
3131
- build: 'avx2'
3232
defines: ''
3333
- build: 'avx'
34-
defines: '-DLLAMA_AVX2=OFF'
34+
defines: '-DGGML_AVX2=OFF'
3535
- build: 'avx512'
36-
defines: '-DLLAMA_AVX512=ON'
36+
defines: '-DGGML_AVX512=ON'
3737
runs-on: ubuntu-20.04
3838
steps:
3939
- uses: actions/checkout@v4
@@ -54,6 +54,11 @@ jobs:
5454
path: ./build/src/libllama.so
5555
name: llama-bin-linux-${{ matrix.build }}-x64.so
5656
if-no-files-found: error
57+
- uses: actions/upload-artifact@v4
58+
with:
59+
path: ./build/ggml/src/libggml.so
60+
name: ggml-bin-linux-${{ matrix.build }}-x64.so
61+
if-no-files-found: error
5762
- name: Upload Llava
5863
uses: actions/upload-artifact@v4
5964
with:
@@ -68,13 +73,13 @@ jobs:
6873
matrix:
6974
include:
7075
- build: 'noavx'
71-
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
76+
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
7277
- build: 'avx2'
7378
defines: ''
7479
- build: 'avx'
75-
defines: '-DLLAMA_AVX2=OFF'
80+
defines: '-DGGML_AVX2=OFF'
7681
- build: 'avx512'
77-
defines: '-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON'
82+
defines: '-DGGML_AVX512=ON -DGGML_AVX512_VBMI=ON -DGGML_AVX512_VNNI=ON'
7883
runs-on: windows-latest
7984
steps:
8085
- uses: actions/checkout@v4
@@ -90,15 +95,22 @@ jobs:
9095
cd build
9196
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
9297
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
98+
tree /f
9399
94-
- name: Upload artifacts
100+
- name: Upload artifacts (llama)
95101
uses: actions/upload-artifact@v4
96102
with:
97103
path: .\build\bin\Release\llama.dll
98104
name: llama-bin-win-${{ matrix.build }}-x64.dll
99105
if-no-files-found: error
106+
- name: Upload artifacts (ggml)
107+
uses: actions/upload-artifact@v4
108+
with:
109+
path: .\build\bin\Release\ggml.dll
110+
name: ggml-bin-win-${{ matrix.build }}-x64.dll
111+
if-no-files-found: error
100112

101-
- name: Upload Llava
113+
- name: Upload artifacts (llava)
102114
uses: actions/upload-artifact@v4
103115
with:
104116
path: .\build\bin\Release\llava_shared.dll
@@ -147,15 +159,15 @@ jobs:
147159
run: |
148160
mkdir build
149161
cd build
150-
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
162+
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
151163
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
152164
ls -R
153165
- name: Build
154166
if: ${{ matrix.os == 'ubuntu-22.04' }}
155167
run: |
156168
mkdir build
157169
cd build
158-
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
170+
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
159171
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
160172
ls -R
161173
- name: Upload llama artifacts (Windows)
@@ -165,6 +177,13 @@ jobs:
165177
path: .\build\bin\Release\llama.dll
166178
name: llama-bin-win-vulkan-x64.dll
167179
if-no-files-found: error
180+
- name: Upload llama artifacts (Windows)
181+
if: ${{ matrix.os == 'windows-latest' }}
182+
uses: actions/upload-artifact@v4
183+
with:
184+
path: .\build\bin\Release\ggml.dll
185+
name: ggml-bin-win-vulkan-x64.dll
186+
if-no-files-found: error
168187
- name: Upload llava artifacts (Windows)
169188
if: ${{ matrix.os == 'windows-latest' }}
170189
uses: actions/upload-artifact@v4
@@ -179,6 +198,13 @@ jobs:
179198
path: ./build/src/libllama.so
180199
name: llama-bin-linux-vulkan-x64.so
181200
if-no-files-found: error
201+
- name: Upload ggml artifacts (Linux)
202+
if: ${{ matrix.os == 'ubuntu-22.04' }}
203+
uses: actions/upload-artifact@v4
204+
with:
205+
path: ./build/ggml/src/libggml.so
206+
name: ggml-bin-linux-vulkan-x64.so
207+
if-no-files-found: error
182208
- name: Upload llava artifacts (Linux)
183209
if: ${{ matrix.os == 'ubuntu-22.04' }}
184210
uses: actions/upload-artifact@v4
@@ -236,6 +262,13 @@ jobs:
236262
path: .\build\bin\Release\llama.dll
237263
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
238264
if-no-files-found: error
265+
- name: Upload artifacts (ggml)
266+
if: ${{ matrix.os == 'windows-2019' }}
267+
uses: actions/upload-artifact@v4
268+
with:
269+
path: .\build\bin\Release\ggml.dll
270+
name: ggml-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
271+
if-no-files-found: error
239272
- name: Upload llava artifacts (Windows)
240273
if: ${{ matrix.os == 'windows-2019' }}
241274
uses: actions/upload-artifact@v4
@@ -250,6 +283,13 @@ jobs:
250283
path: ./build/src/libllama.so
251284
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
252285
if-no-files-found: error
286+
- name: Upload artifacts ggml (Linux)
287+
if: ${{ matrix.os == 'ubuntu-20.04' }}
288+
uses: actions/upload-artifact@v4
289+
with:
290+
path: ./build/ggml/src/libggml.so
291+
name: ggml-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
292+
if-no-files-found: error
253293
- name: Upload llava artifacts (Linux)
254294
if: ${{ matrix.os == 'ubuntu-20.04' }}
255295
uses: actions/upload-artifact@v4
@@ -268,9 +308,9 @@ jobs:
268308
- build: 'arm64'
269309
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON'
270310
- build: 'x64'
271-
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON'
311+
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
272312
- build: 'x64-rosetta2'
273-
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF'
313+
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF'
274314
runs-on: macos-latest
275315
steps:
276316
- uses: actions/checkout@v4
@@ -289,7 +329,13 @@ jobs:
289329
cd build
290330
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
291331
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
292-
- name: Upload artifacts
332+
- name: Upload ggml
333+
uses: actions/upload-artifact@v4
334+
with:
335+
path: ./build/ggml/src/libggml.dylib
336+
name: ggml-bin-osx-${{ matrix.build }}.dylib
337+
if-no-files-found: error
338+
- name: Upload llama
293339
uses: actions/upload-artifact@v4
294340
with:
295341
path: ./build/src/libllama.dylib
@@ -331,54 +377,81 @@ jobs:
331377
# Make all directories at once
332378
mkdir --parents deps/{avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan}
333379
334-
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
335-
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
336-
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
337-
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
338-
339-
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
340-
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
341-
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
342-
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
343-
380+
# Linux
381+
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/libggml.so
382+
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
344383
cp artifacts/llava-bin-linux-noavx-x64.so/libllava_shared.so deps/libllava_shared.so
384+
385+
cp artifacts/ggml-bin-linux-avx-x64.so/libggml.so deps/avx/libggml.so
386+
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
345387
cp artifacts/llava-bin-linux-avx-x64.so/libllava_shared.so deps/avx/libllava_shared.so
388+
389+
cp artifacts/ggml-bin-linux-avx2-x64.so/libggml.so deps/avx2/libggml.so
390+
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
346391
cp artifacts/llava-bin-linux-avx2-x64.so/libllava_shared.so deps/avx2/libllava_shared.so
392+
393+
cp artifacts/ggml-bin-linux-avx512-x64.so/libggml.so deps/avx512/libggml.so
394+
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
347395
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
348396
349-
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
350-
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
351-
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
352-
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
397+
# Windows
398+
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/ggml.dll
399+
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
400+
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
353401
402+
cp artifacts/ggml-bin-win-avx-x64.dll/ggml.dll deps/avx/ggml.dll
403+
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
404+
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
354405
355-
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
356-
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
357-
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
406+
cp artifacts/ggml-bin-win-avx2-x64.dll/ggml.dll deps/avx2/ggml.dll
407+
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
408+
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
358409
359-
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
360-
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
410+
cp artifacts/ggml-bin-win-avx512-x64.dll/ggml.dll deps/avx512/ggml.dll
411+
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
412+
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
361413
362-
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
414+
# MacOS
415+
cp artifacts/ggml-bin-osx-arm64.dylib/libggml.dylib deps/osx-arm64/libggml.dylib
416+
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
417+
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
418+
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
419+
420+
cp artifacts/ggml-bin-osx-x64.dylib/libggml.dylib deps/osx-x64/libggml.dylib
421+
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
422+
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
423+
424+
cp artifacts/ggml-bin-osx-x64-rosetta2.dylib/libggml.dylib deps/osx-x64-rosetta2/libggml.dylib
425+
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
363426
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
364427
365-
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
366-
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll
367-
368-
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
369-
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
428+
# Windows CUDA
429+
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll
430+
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
431+
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll
370432
371-
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
372-
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
433+
cp artifacts/ggml-bin-win-cublas-cu12.2.0-x64.dll/ggml.dll deps/cu12.2.0/ggml.dll
434+
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
435+
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
436+
437+
# Linux CUDA
438+
cp artifacts/ggml-bin-linux-cublas-cu11.7.1-x64.so/libggml.so deps/cu11.7.1/libggml.so
439+
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
440+
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
373441
374-
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
375-
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
442+
cp artifacts/ggml-bin-linux-cublas-cu12.2.0-x64.so/libggml.so deps/cu12.2.0/libggml.so
443+
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
444+
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
376445
377-
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
378-
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
446+
# Windows Vulkan
447+
cp artifacts/ggml-bin-win-vulkan-x64.dll/ggml.dll deps/vulkan/ggml.dll
448+
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
449+
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
379450
380-
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
381-
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
451+
# Linux Vulkan
452+
cp artifacts/ggml-bin-linux-vulkan-x64.so/libggml.so deps/vulkan/libggml.so
453+
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
454+
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
382455
383456
- name: Upload artifacts
384457
uses: actions/upload-artifact@v4
@@ -394,3 +467,4 @@ jobs:
394467
llama-*
395468
llava-*
396469
*.metal
470+
ggml-*

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,3 +350,5 @@ site/
350350
/LLama.Benchmark/Models/*.gguf
351351

352352
**/appsettings.Local.json
353+
/LLama/runtimes/deps
354+
/LLama/runtimes/deps.zip

LLama.Unittest/GrammarTest.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
using LLama.Common;
1+
using LLama.Common;
22
using LLama.Grammars;
33
using LLama.Native;
44

@@ -86,9 +86,9 @@ public async Task SampleWithTrivialGrammar()
8686
Grammar = grammarInstance2,
8787
};
8888

89-
var result = await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync();
89+
var result = string.Join("", await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync());
9090

91-
Assert.Equal("cat", result[0]);
91+
Assert.Equal("cat", result);
9292
}
9393

9494
//this test is flakey - it reproduces an error which appears to be a bug in llama.cpp

LLama.Unittest/LLama.Unittest.csproj

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,39 @@
2727
</PackageReference>
2828
</ItemGroup>
2929

30-
<Target Name="DownloadContentFiles" BeforeTargets="Build">
31-
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
32-
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
33-
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
34-
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true"></DownloadFile>
35-
30+
<Target Name="DownloadContentFilesInner">
31+
32+
<DownloadFile
33+
SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"
34+
DestinationFolder="Models"
35+
DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf"
36+
SkipUnchangedFiles="true">
37+
</DownloadFile>
38+
39+
<DownloadFile
40+
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf"
41+
DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
42+
SkipUnchangedFiles="true">
43+
</DownloadFile>
44+
45+
<DownloadFile
46+
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf"
47+
DestinationFolder="Models"
48+
DestinationFileName="mmproj-model-f16.gguf"
49+
SkipUnchangedFiles="true">
50+
</DownloadFile>
3651

52+
<DownloadFile
53+
SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf"
54+
DestinationFolder="Models"
55+
DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf"
56+
SkipUnchangedFiles="true">
57+
</DownloadFile>
58+
59+
</Target>
60+
61+
<Target Name="DownloadContentFiles" BeforeTargets="DispatchToInnerBuilds;BeforeBuild">
62+
<MSBuild Projects="$(MSBuildProjectFile)" Targets="DownloadContentFilesInner" Properties="TargetFramework=once" />
3763
</Target>
3864

3965
<ItemGroup>

LLama.Unittest/TemplateTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,4 +262,4 @@ public void EndOSpeechToken_ReturnsExpected()
262262
{
263263
Assert.Equal("</s>", _model.Tokens.EndOfSpeechToken);
264264
}
265-
}
265+
}

LLama/LLamaContext.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ public uint BatchThreads
7777
/// Get the maximum batch size for this context
7878
/// </summary>
7979
public uint BatchSize => NativeHandle.BatchSize;
80+
81+
/// <summary>
82+
/// Get the special tokens for the model associated with this context
83+
/// </summary>
84+
public SafeLlamaModelHandle.ModelTokens Tokens { get; }
8085

8186
private LLamaTokenData[]? _samplingBuffer;
8287

@@ -99,6 +104,8 @@ public LLamaContext(LLamaWeights model, IContextParams @params, ILogger? logger
99104

100105
@params.ToLlamaContextParams(out var lparams);
101106
NativeHandle = SafeLLamaContextHandle.Create(model.NativeHandle, lparams);
107+
108+
Tokens = model.Tokens;
102109
}
103110

104111
/// <summary>

0 commit comments

Comments
 (0)