Skip to content

Commit e377987

Browse files
committed
Update llama.cpp
1 parent 92b0013 commit e377987

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- (llama.cpp) Fix struct misalignment bug
13+
1014
## [0.1.64]
1115

1216
### Added

llama_cpp/llama_cpp.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -150,47 +150,43 @@ class llama_token_data_array(Structure):
150150

151151

152152
# struct llama_context_params {
153+
# int seed; // RNG seed, -1 for random
153154
# int n_ctx; // text context
154155
# int n_batch; // prompt processing batch size
155156
# int n_gpu_layers; // number of layers to store in VRAM
156157
# int main_gpu; // the GPU that is used for scratch and small tensors
157158
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
158-
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
159-
# int seed; // RNG seed, -1 for random
159+
# // called with a progress value between 0 and 1, pass NULL to disable
160+
# llama_progress_callback progress_callback;
161+
# // context pointer passed to the progress callback
162+
# void * progress_callback_user_data;
160163

164+
# // Keep the booleans together to avoid misalignment during copy-by-value.
165+
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
161166
# bool f16_kv; // use fp16 for KV cache
162167
# bool logits_all; // the llama_eval() call computes all logits, not just the last one
163168
# bool vocab_only; // only load the vocabulary, no weights
164169
# bool use_mmap; // use mmap if possible
165170
# bool use_mlock; // force system to keep model in RAM
166171
# bool embedding; // embedding mode only
167-
168-
169-
# // called with a progress value between 0 and 1, pass NULL to disable
170-
# llama_progress_callback progress_callback;
171-
# // context pointer passed to the progress callback
172-
# void * progress_callback_user_data;
173172
# };
174173
class llama_context_params(Structure):
175174
_fields_ = [
175+
("seed", c_int),
176176
("n_ctx", c_int),
177177
("n_batch", c_int),
178178
("n_gpu_layers", c_int),
179179
("main_gpu", c_int),
180180
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
181+
("progress_callback", llama_progress_callback),
182+
("progress_callback_user_data", c_void_p),
181183
("low_vram", c_bool),
182-
("seed", c_int),
183184
("f16_kv", c_bool),
184-
(
185-
"logits_all",
186-
c_bool,
187-
),
185+
("logits_all", c_bool),
188186
("vocab_only", c_bool),
189187
("use_mmap", c_bool),
190188
("use_mlock", c_bool),
191189
("embedding", c_bool),
192-
("progress_callback", llama_progress_callback),
193-
("progress_callback_user_data", c_void_p),
194190
]
195191

196192

vendor/llama.cpp

0 commit comments

Comments
 (0)