Skip to content

Commit 0ae78da

Browse files
authored
Merge pull request #740 from brittlewis12/catchup-upstream
Resolve build errors: update/remove kv cache APIs, disable building tools, latest release & upstream org ref
2 parents 00385ef + ff4784e commit 0ae78da

File tree

4 files changed

+19
-133
lines changed

4 files changed

+19
-133
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule "llama-cpp-sys-2/llama.cpp"]
22
path = llama-cpp-sys-2/llama.cpp
3-
url = https://github.com/ggerganov/llama.cpp
3+
url = https://github.com/ggml-org/llama.cpp

llama-cpp-2/src/context/kv_cache.rs

Lines changed: 11 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ impl LlamaContext<'_> {
2828
/// * `dest` - The sequence id to copy the cache to.
2929
/// * `size` - The size of the cache to copy.
3030
pub fn copy_cache(&mut self, src: i32, dest: i32, size: i32) {
31-
unsafe { llama_cpp_sys_2::llama_kv_cache_seq_cp(self.context.as_ptr(), src, dest, 0, size) }
31+
unsafe { llama_cpp_sys_2::llama_kv_self_seq_cp(self.context.as_ptr(), src, dest, 0, size) }
3232
}
3333

3434
/// Copy the cache from one sequence to another.
@@ -58,7 +58,7 @@ impl LlamaContext<'_> {
5858
.map_or(Ok(-1), i32::try_from)
5959
.map_err(KvCacheConversionError::P1TooLarge)?;
6060
unsafe {
61-
llama_cpp_sys_2::llama_kv_cache_seq_cp(self.context.as_ptr(), src, dest, p0, p1);
61+
llama_cpp_sys_2::llama_kv_self_seq_cp(self.context.as_ptr(), src, dest, p0, p1);
6262
}
6363
Ok(())
6464
}
@@ -92,18 +92,18 @@ impl LlamaContext<'_> {
9292
let p1 = p1
9393
.map_or(Ok(-1), i32::try_from)
9494
.map_err(KvCacheConversionError::P1TooLarge)?;
95-
Ok(unsafe { llama_cpp_sys_2::llama_kv_cache_seq_rm(self.context.as_ptr(), src, p0, p1) })
95+
Ok(unsafe { llama_cpp_sys_2::llama_kv_self_seq_rm(self.context.as_ptr(), src, p0, p1) })
9696
}
9797

9898
/// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
9999
#[must_use]
100100
pub fn get_kv_cache_used_cells(&self) -> i32 {
101-
unsafe { llama_cpp_sys_2::llama_get_kv_cache_used_cells(self.context.as_ptr()) }
101+
unsafe { llama_cpp_sys_2::llama_kv_self_used_cells(self.context.as_ptr()) }
102102
}
103103

104104
/// Clear the KV cache
105105
pub fn clear_kv_cache(&mut self) {
106-
unsafe { llama_cpp_sys_2::llama_kv_cache_clear(self.context.as_ptr()) }
106+
unsafe { llama_cpp_sys_2::llama_kv_self_clear(self.context.as_ptr()) }
107107
}
108108

109109
/// Removes all tokens that do not belong to the specified sequence
@@ -112,7 +112,7 @@ impl LlamaContext<'_> {
112112
///
113113
/// * `seq_id` - The sequence id to keep
114114
pub fn llama_kv_cache_seq_keep(&mut self, seq_id: i32) {
115-
unsafe { llama_cpp_sys_2::llama_kv_cache_seq_keep(self.context.as_ptr(), seq_id) }
115+
unsafe { llama_cpp_sys_2::llama_kv_self_seq_keep(self.context.as_ptr(), seq_id) }
116116
}
117117

118118
#[allow(clippy::doc_markdown)]
@@ -147,7 +147,7 @@ impl LlamaContext<'_> {
147147
.map_or(Ok(-1), i32::try_from)
148148
.map_err(KvCacheConversionError::P1TooLarge)?;
149149
unsafe {
150-
llama_cpp_sys_2::llama_kv_cache_seq_add(self.context.as_ptr(), seq_id, p0, p1, delta);
150+
llama_cpp_sys_2::llama_kv_self_seq_add(self.context.as_ptr(), seq_id, p0, p1, delta);
151151
}
152152
Ok(())
153153
}
@@ -183,7 +183,7 @@ impl LlamaContext<'_> {
183183
.map_or(Ok(-1), i32::try_from)
184184
.map_err(KvCacheConversionError::P1TooLarge)?;
185185
let d = c_int::from(d.get());
186-
unsafe { llama_cpp_sys_2::llama_kv_cache_seq_div(self.context.as_ptr(), seq_id, p0, p1, d) }
186+
unsafe { llama_cpp_sys_2::llama_kv_self_seq_div(self.context.as_ptr(), seq_id, p0, p1, d) }
187187
Ok(())
188188
}
189189

@@ -194,138 +194,19 @@ impl LlamaContext<'_> {
194194
/// * `seq_id` - The sequence id to get the max position for
195195
#[must_use]
196196
pub fn kv_cache_seq_pos_max(&self, seq_id: i32) -> i32 {
197-
unsafe { llama_cpp_sys_2::llama_kv_cache_seq_pos_max(self.context.as_ptr(), seq_id) }
197+
unsafe { llama_cpp_sys_2::llama_kv_self_seq_pos_max(self.context.as_ptr(), seq_id) }
198198
}
199199

200200
/// Defragment the KV cache
201201
/// This will be applied:
202202
/// - lazily on next [`LlamaContext::decode`]
203203
/// - explicitly with [`Self::kv_cache_update`]
204204
pub fn kv_cache_defrag(&mut self) {
205-
unsafe { llama_cpp_sys_2::llama_kv_cache_defrag(self.context.as_ptr()) }
205+
unsafe { llama_cpp_sys_2::llama_kv_self_defrag(self.context.as_ptr()) }
206206
}
207207

208208
/// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
209209
pub fn kv_cache_update(&mut self) {
210-
unsafe { llama_cpp_sys_2::llama_kv_cache_update(self.context.as_ptr()) }
211-
}
212-
213-
/// Returns the number of tokens in the KV cache (slow, use only for debug)
214-
/// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
215-
#[must_use]
216-
pub fn get_kv_cache_token_count(&self) -> i32 {
217-
unsafe { llama_cpp_sys_2::llama_get_kv_cache_token_count(self.context.as_ptr()) }
218-
}
219-
220-
/// Create an empty KV cache view. (use only for debugging purposes)
221-
///
222-
/// # Parameters
223-
///
224-
/// * `n_max_seq` - Maximum number of sequences that can exist in a cell. It's not an error
225-
/// if there are more sequences in a cell than this value, however they will
226-
/// not be visible in the view `cells_sequences`.
227-
#[must_use]
228-
pub fn new_kv_cache_view(&self, n_max_seq: i32) -> KVCacheView {
229-
let view =
230-
unsafe { llama_cpp_sys_2::llama_kv_cache_view_init(self.context.as_ptr(), n_max_seq) };
231-
KVCacheView { view, ctx: self }
232-
}
233-
}
234-
235-
/// Information associated with an individual cell in the KV cache view.
236-
#[derive(Debug)]
237-
pub struct KVCacheViewCell {
238-
/// The position for this cell. Takes KV cache shifts into account.
239-
/// May be negative if the cell is not populated.
240-
pub pos: llama_cpp_sys_2::llama_pos,
241-
}
242-
243-
/// An updateable view of the KV cache. (use only for debugging purposes)
244-
#[derive(Debug)]
245-
pub struct KVCacheView<'a> {
246-
ctx: &'a LlamaContext<'a>,
247-
view: llama_cpp_sys_2::llama_kv_cache_view,
248-
}
249-
250-
impl KVCacheView<'_> {
251-
/// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
252-
pub fn update(&mut self) {
253-
unsafe {
254-
llama_cpp_sys_2::llama_kv_cache_view_update(self.ctx.context.as_ptr(), &mut self.view);
255-
}
256-
}
257-
258-
/// Number of KV cache cells. This will be the same as the context size.
259-
#[must_use]
260-
pub fn n_cells(&self) -> i32 {
261-
self.view.n_cells
262-
}
263-
264-
/// Number of tokens in the cache. For example, if there are two populated
265-
/// cells, the first with 1 sequence id in it and the second with 2 sequence
266-
/// ids then you'll have 3 tokens.
267-
#[must_use]
268-
pub fn token_count(&self) -> i32 {
269-
self.view.token_count
270-
}
271-
272-
/// Number of populated cache cells.
273-
#[must_use]
274-
pub fn used_cells(&self) -> i32 {
275-
self.view.used_cells
276-
}
277-
278-
/// Maximum contiguous empty slots in the cache.
279-
#[must_use]
280-
pub fn max_contiguous(&self) -> i32 {
281-
self.view.max_contiguous
282-
}
283-
284-
/// Index to the start of the `max_contiguous` slot range. Can be negative
285-
/// when cache is full.
286-
#[must_use]
287-
pub fn max_contiguous_idx(&self) -> i32 {
288-
self.view.max_contiguous_idx
289-
}
290-
291-
/// Information for individual cells.
292-
///
293-
/// # Panics
294-
///
295-
/// - if `n_cells` does not fit into usize.
296-
pub fn cells(&self) -> impl Iterator<Item = KVCacheViewCell> {
297-
unsafe {
298-
std::slice::from_raw_parts(
299-
self.view.cells,
300-
usize::try_from(self.view.n_cells).expect("failed to fit n_cells into usize"),
301-
)
302-
}
303-
.iter()
304-
.map(|&cell| KVCacheViewCell { pos: cell.pos })
305-
}
306-
307-
/// The sequences for each cell. There will be `n_max_seq` items per cell.
308-
///
309-
/// # Panics
310-
///
311-
/// - if `n_cells * n_max_seq` does not fit into usize.
312-
/// - if `n_max_seq` does not fit into usize.
313-
pub fn cells_sequences(&self) -> impl Iterator<Item = &[llama_cpp_sys_2::llama_seq_id]> {
314-
unsafe {
315-
std::slice::from_raw_parts(
316-
self.view.cells_sequences,
317-
usize::try_from(self.view.n_cells * self.view.n_seq_max)
318-
.expect("failed to fit n_cells * n_max_seq into usize"),
319-
)
320-
}
321-
.chunks(usize::try_from(self.view.n_seq_max).expect("failed to fit n_max_seq into usize"))
322-
}
323-
}
324-
325-
impl Drop for KVCacheView<'_> {
326-
fn drop(&mut self) {
327-
unsafe {
328-
llama_cpp_sys_2::llama_kv_cache_view_free(&mut self.view);
329-
}
210+
unsafe { llama_cpp_sys_2::llama_kv_self_update(self.context.as_ptr()) }
330211
}
331212
}

llama-cpp-sys-2/build.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ fn main() {
268268
config.define("LLAMA_BUILD_TESTS", "OFF");
269269
config.define("LLAMA_BUILD_EXAMPLES", "OFF");
270270
config.define("LLAMA_BUILD_SERVER", "OFF");
271+
config.define("LLAMA_BUILD_TOOLS", "OFF");
271272
config.define("LLAMA_CURL", "OFF");
272273

273274
config.define(
@@ -279,7 +280,11 @@ fn main() {
279280
config.define("GGML_BLAS", "OFF");
280281
}
281282

282-
if (matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc)) && matches!(profile.as_str(), "Release" | "RelWithDebInfo" | "MinSizeRel"))
283+
if (matches!(target_os, TargetOs::Windows(WindowsVariant::Msvc))
284+
&& matches!(
285+
profile.as_str(),
286+
"Release" | "RelWithDebInfo" | "MinSizeRel"
287+
))
283288
{
284289
// Debug Rust builds under MSVC turn off optimization even though we're ideally building the release profile of llama.cpp.
285290
// Looks like an upstream bug:

llama-cpp-sys-2/llama.cpp

Submodule llama.cpp updated 443 files

0 commit comments

Comments
 (0)