Skip to content

Commit 3af167d

Browse files
committed
Merge tag 'v0.2.16' into main
2 parents cc0fe43 + b7e60b6 commit 3af167d

File tree

6 files changed

+40
-21
lines changed

6 files changed

+40
-21
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ jobs:
3333
3434
- name: Build wheels
3535
run: python -m cibuildwheel --output-dir wheelhouse
36+
env:
37+
# disable repair
38+
CIBW_REPAIR_WHEEL_COMMAND: ""
3639

3740
- uses: actions/upload-artifact@v3
3841
with:

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.16]
11+
12+
- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
13+
- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d
14+
- Fix server doc arguments by @kjunggithub in #892
15+
- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380
16+
- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8
17+
- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829
18+
- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24
19+
- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a
20+
- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c
21+
- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617
22+
1023
## [0.2.15]
1124

1225
- Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4

llama_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.15"
4+
__version__ = "0.2.16"

llama_cpp/llama.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,27 +1019,26 @@ def eval(self, tokens: Sequence[int]):
10191019
"""
10201020
assert self._ctx.ctx is not None
10211021
assert self._batch.batch is not None
1022-
n_ctx = self._n_ctx
1022+
self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1)
10231023
for i in range(0, len(tokens), self.n_batch):
10241024
batch = tokens[i : min(len(tokens), i + self.n_batch)]
1025-
n_past = min(n_ctx - len(batch), self.n_tokens)
1025+
n_past = self.n_tokens
10261026
n_tokens = len(batch)
1027-
self._ctx.kv_cache_seq_rm(-1, n_past, -1)
10281027
self._batch.set_batch(
10291028
batch=batch, n_past=n_past, logits_all=self.context_params.logits_all
10301029
)
10311030
self._ctx.decode(self._batch)
10321031
# Save tokens
1033-
self.input_ids[self.n_tokens : self.n_tokens + n_tokens] = batch
1032+
self.input_ids[n_past : n_past + n_tokens] = batch
10341033
# Save logits
1035-
rows = n_tokens if self.context_params.logits_all else 1
1034+
rows = n_tokens
10361035
cols = self._n_vocab
10371036
offset = (
10381037
0 if self.context_params.logits_all else n_tokens - 1
10391038
) # NOTE: Only save the last token logits if logits_all is False
1040-
self.scores[self.n_tokens + offset : self.n_tokens + n_tokens, :].reshape(
1039+
self.scores[n_past + offset : n_past + n_tokens, :].reshape(
10411040
-1
1042-
)[:] = self._ctx.get_logits()[: rows * cols]
1041+
)[:] = self._ctx.get_logits()[offset * cols: rows * cols]
10431042
# Update n_tokens
10441043
self.n_tokens += n_tokens
10451044

tests/test_llama.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import ctypes
2+
13
import pytest
4+
25
import llama_cpp
36

47
MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf"
@@ -36,19 +39,20 @@ def test_llama_cpp_tokenization():
3639

3740

3841
def test_llama_patch(monkeypatch):
39-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
42+
n_ctx = 128
43+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx)
4044
n_vocab = llama_cpp.llama_n_vocab(llama._model.model)
45+
assert n_vocab == 32000
4146

4247
## Set up mock function
43-
def mock_eval(*args, **kwargs):
48+
def mock_decode(*args, **kwargs):
4449
return 0
4550

4651
def mock_get_logits(*args, **kwargs):
47-
return (llama_cpp.c_float * n_vocab)(
48-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
49-
)
52+
size = n_vocab * n_ctx
53+
return (llama_cpp.c_float * size)()
5054

51-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
55+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
5256
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
5357

5458
output_text = " jumps over the lazy dog."
@@ -126,19 +130,19 @@ def test_llama_pickle():
126130

127131

128132
def test_utf8(monkeypatch):
129-
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True)
133+
n_ctx = 512
134+
llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx, logits_all=True)
130135
n_vocab = llama.n_vocab()
131136

132137
## Set up mock function
133-
def mock_eval(*args, **kwargs):
138+
def mock_decode(*args, **kwargs):
134139
return 0
135140

136141
def mock_get_logits(*args, **kwargs):
137-
return (llama_cpp.c_float * n_vocab)(
138-
*[llama_cpp.c_float(0) for _ in range(n_vocab)]
139-
)
142+
size = n_vocab * n_ctx
143+
return (llama_cpp.c_float * size)()
140144

141-
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval)
145+
monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode)
142146
monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits)
143147

144148
output_text = "😀"

vendor/llama.cpp

0 commit comments

Comments
 (0)