@@ -28,7 +28,7 @@ impl LlamaContext<'_> {
28
28
/// * `dest` - The sequence id to copy the cache to.
29
29
/// * `size` - The size of the cache to copy.
30
30
pub fn copy_cache ( & mut self , src : i32 , dest : i32 , size : i32 ) {
31
- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, 0 , size) }
31
+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_cp ( self . context . as_ptr ( ) , src, dest, 0 , size) }
32
32
}
33
33
34
34
/// Copy the cache from one sequence to another.
@@ -58,7 +58,7 @@ impl LlamaContext<'_> {
58
58
. map_or ( Ok ( -1 ) , i32:: try_from)
59
59
. map_err ( KvCacheConversionError :: P1TooLarge ) ?;
60
60
unsafe {
61
- llama_cpp_sys_2:: llama_kv_cache_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1) ;
61
+ llama_cpp_sys_2:: llama_kv_self_seq_cp ( self . context . as_ptr ( ) , src, dest, p0, p1) ;
62
62
}
63
63
Ok ( ( ) )
64
64
}
@@ -92,18 +92,18 @@ impl LlamaContext<'_> {
92
92
let p1 = p1
93
93
. map_or ( Ok ( -1 ) , i32:: try_from)
94
94
. map_err ( KvCacheConversionError :: P1TooLarge ) ?;
95
- Ok ( unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_rm ( self . context . as_ptr ( ) , src, p0, p1) } )
95
+ Ok ( unsafe { llama_cpp_sys_2:: llama_kv_self_seq_rm ( self . context . as_ptr ( ) , src, p0, p1) } )
96
96
}
97
97
98
98
/// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
99
99
#[ must_use]
100
100
pub fn get_kv_cache_used_cells ( & self ) -> i32 {
101
- unsafe { llama_cpp_sys_2:: llama_get_kv_cache_used_cells ( self . context . as_ptr ( ) ) }
101
+ unsafe { llama_cpp_sys_2:: llama_kv_self_used_cells ( self . context . as_ptr ( ) ) }
102
102
}
103
103
104
104
/// Clear the KV cache
105
105
pub fn clear_kv_cache ( & mut self ) {
106
- unsafe { llama_cpp_sys_2:: llama_kv_cache_clear ( self . context . as_ptr ( ) ) }
106
+ unsafe { llama_cpp_sys_2:: llama_kv_self_clear ( self . context . as_ptr ( ) ) }
107
107
}
108
108
109
109
/// Removes all tokens that do not belong to the specified sequence
@@ -112,7 +112,7 @@ impl LlamaContext<'_> {
112
112
///
113
113
/// * `seq_id` - The sequence id to keep
114
114
pub fn llama_kv_cache_seq_keep ( & mut self , seq_id : i32 ) {
115
- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_keep ( self . context . as_ptr ( ) , seq_id) }
115
+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_keep ( self . context . as_ptr ( ) , seq_id) }
116
116
}
117
117
118
118
#[ allow( clippy:: doc_markdown) ]
@@ -147,7 +147,7 @@ impl LlamaContext<'_> {
147
147
. map_or ( Ok ( -1 ) , i32:: try_from)
148
148
. map_err ( KvCacheConversionError :: P1TooLarge ) ?;
149
149
unsafe {
150
- llama_cpp_sys_2:: llama_kv_cache_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta) ;
150
+ llama_cpp_sys_2:: llama_kv_self_seq_add ( self . context . as_ptr ( ) , seq_id, p0, p1, delta) ;
151
151
}
152
152
Ok ( ( ) )
153
153
}
@@ -183,7 +183,7 @@ impl LlamaContext<'_> {
183
183
. map_or ( Ok ( -1 ) , i32:: try_from)
184
184
. map_err ( KvCacheConversionError :: P1TooLarge ) ?;
185
185
let d = c_int:: from ( d. get ( ) ) ;
186
- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_div ( self . context . as_ptr ( ) , seq_id, p0, p1, d) }
186
+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_div ( self . context . as_ptr ( ) , seq_id, p0, p1, d) }
187
187
Ok ( ( ) )
188
188
}
189
189
@@ -194,138 +194,19 @@ impl LlamaContext<'_> {
194
194
/// * `seq_id` - The sequence id to get the max position for
195
195
#[ must_use]
196
196
pub fn kv_cache_seq_pos_max ( & self , seq_id : i32 ) -> i32 {
197
- unsafe { llama_cpp_sys_2:: llama_kv_cache_seq_pos_max ( self . context . as_ptr ( ) , seq_id) }
197
+ unsafe { llama_cpp_sys_2:: llama_kv_self_seq_pos_max ( self . context . as_ptr ( ) , seq_id) }
198
198
}
199
199
200
200
/// Defragment the KV cache
201
201
/// This will be applied:
202
202
/// - lazily on next [`LlamaContext::decode`]
203
203
/// - explicitly with [`Self::kv_cache_update`]
204
204
pub fn kv_cache_defrag ( & mut self ) {
205
- unsafe { llama_cpp_sys_2:: llama_kv_cache_defrag ( self . context . as_ptr ( ) ) }
205
+ unsafe { llama_cpp_sys_2:: llama_kv_self_defrag ( self . context . as_ptr ( ) ) }
206
206
}
207
207
208
208
/// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
209
209
pub fn kv_cache_update ( & mut self ) {
210
- unsafe { llama_cpp_sys_2:: llama_kv_cache_update ( self . context . as_ptr ( ) ) }
211
- }
212
-
213
- /// Returns the number of tokens in the KV cache (slow, use only for debug)
214
- /// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
215
- #[ must_use]
216
- pub fn get_kv_cache_token_count ( & self ) -> i32 {
217
- unsafe { llama_cpp_sys_2:: llama_get_kv_cache_token_count ( self . context . as_ptr ( ) ) }
218
- }
219
-
220
- /// Create an empty KV cache view. (use only for debugging purposes)
221
- ///
222
- /// # Parameters
223
- ///
224
- /// * `n_max_seq` - Maximum number of sequences that can exist in a cell. It's not an error
225
- /// if there are more sequences in a cell than this value, however they will
226
- /// not be visible in the view `cells_sequences`.
227
- #[ must_use]
228
- pub fn new_kv_cache_view ( & self , n_max_seq : i32 ) -> KVCacheView {
229
- let view =
230
- unsafe { llama_cpp_sys_2:: llama_kv_cache_view_init ( self . context . as_ptr ( ) , n_max_seq) } ;
231
- KVCacheView { view, ctx : self }
232
- }
233
- }
234
-
235
- /// Information associated with an individual cell in the KV cache view.
236
- #[ derive( Debug ) ]
237
- pub struct KVCacheViewCell {
238
- /// The position for this cell. Takes KV cache shifts into account.
239
- /// May be negative if the cell is not populated.
240
- pub pos : llama_cpp_sys_2:: llama_pos ,
241
- }
242
-
243
- /// An updateable view of the KV cache. (use only for debugging purposes)
244
- #[ derive( Debug ) ]
245
- pub struct KVCacheView < ' a > {
246
- ctx : & ' a LlamaContext < ' a > ,
247
- view : llama_cpp_sys_2:: llama_kv_cache_view ,
248
- }
249
-
250
- impl KVCacheView < ' _ > {
251
- /// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
252
- pub fn update ( & mut self ) {
253
- unsafe {
254
- llama_cpp_sys_2:: llama_kv_cache_view_update ( self . ctx . context . as_ptr ( ) , & mut self . view ) ;
255
- }
256
- }
257
-
258
- /// Number of KV cache cells. This will be the same as the context size.
259
- #[ must_use]
260
- pub fn n_cells ( & self ) -> i32 {
261
- self . view . n_cells
262
- }
263
-
264
- /// Number of tokens in the cache. For example, if there are two populated
265
- /// cells, the first with 1 sequence id in it and the second with 2 sequence
266
- /// ids then you'll have 3 tokens.
267
- #[ must_use]
268
- pub fn token_count ( & self ) -> i32 {
269
- self . view . token_count
270
- }
271
-
272
- /// Number of populated cache cells.
273
- #[ must_use]
274
- pub fn used_cells ( & self ) -> i32 {
275
- self . view . used_cells
276
- }
277
-
278
- /// Maximum contiguous empty slots in the cache.
279
- #[ must_use]
280
- pub fn max_contiguous ( & self ) -> i32 {
281
- self . view . max_contiguous
282
- }
283
-
284
- /// Index to the start of the `max_contiguous` slot range. Can be negative
285
- /// when cache is full.
286
- #[ must_use]
287
- pub fn max_contiguous_idx ( & self ) -> i32 {
288
- self . view . max_contiguous_idx
289
- }
290
-
291
- /// Information for individual cells.
292
- ///
293
- /// # Panics
294
- ///
295
- /// - if `n_cells` does not fit into usize.
296
- pub fn cells ( & self ) -> impl Iterator < Item = KVCacheViewCell > {
297
- unsafe {
298
- std:: slice:: from_raw_parts (
299
- self . view . cells ,
300
- usize:: try_from ( self . view . n_cells ) . expect ( "failed to fit n_cells into usize" ) ,
301
- )
302
- }
303
- . iter ( )
304
- . map ( |& cell| KVCacheViewCell { pos : cell. pos } )
305
- }
306
-
307
- /// The sequences for each cell. There will be `n_max_seq` items per cell.
308
- ///
309
- /// # Panics
310
- ///
311
- /// - if `n_cells * n_max_seq` does not fit into usize.
312
- /// - if `n_max_seq` does not fit into usize.
313
- pub fn cells_sequences ( & self ) -> impl Iterator < Item = & [ llama_cpp_sys_2:: llama_seq_id ] > {
314
- unsafe {
315
- std:: slice:: from_raw_parts (
316
- self . view . cells_sequences ,
317
- usize:: try_from ( self . view . n_cells * self . view . n_seq_max )
318
- . expect ( "failed to fit n_cells * n_max_seq into usize" ) ,
319
- )
320
- }
321
- . chunks ( usize:: try_from ( self . view . n_seq_max ) . expect ( "failed to fit n_max_seq into usize" ) )
322
- }
323
- }
324
-
325
- impl Drop for KVCacheView < ' _ > {
326
- fn drop ( & mut self ) {
327
- unsafe {
328
- llama_cpp_sys_2:: llama_kv_cache_view_free ( & mut self . view ) ;
329
- }
210
+ unsafe { llama_cpp_sys_2:: llama_kv_self_update ( self . context . as_ptr ( ) ) }
330
211
}
331
212
}
0 commit comments