Skip to content

Commit 1170135

Browse files
committed
llama_batch_ext_add_text
1 parent 40989f4 commit 1170135

File tree

4 files changed

+10
-10
lines changed

4 files changed

+10
-10
lines changed

common/speculative.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ llama_tokens common_speculative_gen_draft(
209209

210210
for (size_t i = i_start + reuse_n; i < prompt_tgt.size(); ++i) {
211211
//LOG_DBG("i = %d, i_start = %d, reuse_n = %d, i - i_start = %d, id = %6d\n", i, i_start, reuse_n, i - i_start, prompt_tgt[i]);
212-
llama_batch_ext_add_text_token(batch.get(), prompt_tgt[i], i - i_start, &seq_id, 1, false);
212+
llama_batch_ext_add_text(batch.get(), prompt_tgt[i], i - i_start, &seq_id, 1, false);
213213

214214
prompt.push_back(prompt_tgt[i]);
215215
}
@@ -226,7 +226,7 @@ llama_tokens common_speculative_gen_draft(
226226
LOG_DBG("%s: n_past = %d\n", __func__, n_past);
227227

228228
llama_batch_ext_clear(batch.get());
229-
llama_batch_ext_add_text_token(batch.get(), id_last, n_past, &seq_id, 1, true);
229+
llama_batch_ext_add_text(batch.get(), id_last, n_past, &seq_id, 1, true);
230230

231231
prompt.push_back(id_last);
232232

@@ -265,7 +265,7 @@ llama_tokens common_speculative_gen_draft(
265265
break;
266266
}
267267

268-
llama_batch_ext_add_text_token(batch.get(), id, n_past + i + 1, &seq_id, 1, true);
268+
llama_batch_ext_add_text(batch.get(), id, n_past + i + 1, &seq_id, 1, true);
269269

270270
// evaluate the drafted tokens on the draft model
271271
llama_decode_ext(ctx, batch.get());

examples/server/server.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2849,7 +2849,7 @@ struct server_context {
28492849
slot.i_batch = llama_batch_ext_get_n_tokens(batch.get());
28502850

28512851
std::array<llama_token, 1> seq_id = { slot.id };
2852-
llama_batch_ext_add_text_token(batch.get(), slot.sampled, slot.n_past, seq_id.data(), seq_id.size(), true);
2852+
llama_batch_ext_add_text(batch.get(), slot.sampled, slot.n_past, seq_id.data(), seq_id.size(), true);
28532853

28542854
slot.n_past += 1;
28552855

@@ -3057,7 +3057,7 @@ struct server_context {
30573057
const bool need_embd = slot.task_type == SERVER_TASK_TYPE_EMBEDDING && llama_pooling_type(slot.ctx) == LLAMA_POOLING_TYPE_NONE;
30583058

30593059
std::array<llama_token, 1> seq_id = { slot.id };
3060-
llama_batch_ext_add_text_token(batch.get(), prompt_tokens[slot.n_past], slot.n_past, seq_id.data(), seq_id.size(), need_embd);
3060+
llama_batch_ext_add_text(batch.get(), prompt_tokens[slot.n_past], slot.n_past, seq_id.data(), seq_id.size(), need_embd);
30613061

30623062
if (slot.params.cache_prompt) {
30633063
slot.cache_tokens.push_back(prompt_tokens[slot.n_past]);
@@ -3255,10 +3255,10 @@ struct server_context {
32553255
// construct the speculation batch
32563256
llama_batch_ext_clear(slot.batch_spec.get());
32573257
std::array<llama_token, 1> seq_id = { slot.id };
3258-
llama_batch_ext_add_text_token(slot.batch_spec.get(), id, slot.n_past, seq_id.data(), seq_id.size(), true);
3258+
llama_batch_ext_add_text(slot.batch_spec.get(), id, slot.n_past, seq_id.data(), seq_id.size(), true);
32593259

32603260
for (size_t i = 0; i < draft.size(); ++i) {
3261-
llama_batch_ext_add_text_token(slot.batch_spec.get(), draft[i], slot.n_past + 1, seq_id.data(), seq_id.size(), true);
3261+
llama_batch_ext_add_text(slot.batch_spec.get(), draft[i], slot.n_past + 1, seq_id.data(), seq_id.size(), true);
32623262
}
32633263

32643264
SLT_DBG(slot, "decoding speculative batch, size = %d\n", llama_batch_ext_get_n_tokens(slot.batch_spec.get()));

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,7 +905,7 @@ extern "C" {
905905
// 0 : success
906906
// -1 : not enough space in the batch
907907
// -2 : embd is already set, cannot add text tokens
908-
LLAMA_API int32_t llama_batch_ext_add_text_token(
908+
LLAMA_API int32_t llama_batch_ext_add_text(
909909
struct llama_batch_ext * batch,
910910
llama_token token,
911911
llama_pos pos,

src/llama-batch.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ struct llama_batch_ext * llama_batch_ext_init_from_text(
344344
int32_t seq_id) {
345345
llama_batch_ext * batch = llama_batch_ext_init(n_tokens, 1);
346346
for (int32_t i = 0; i < n_tokens; i++) {
347-
llama_batch_ext_add_text_token(batch, tokens[i], pos0 + i, &seq_id, 1, false);
347+
llama_batch_ext_add_text(batch, tokens[i], pos0 + i, &seq_id, 1, false);
348348
}
349349
return batch;
350350
}
@@ -404,7 +404,7 @@ int32_t llama_batch_ext_get_n_tokens(const struct llama_batch_ext * batch) {
404404
return batch->n_tokens;
405405
}
406406

407-
int32_t llama_batch_ext_add_text_token(
407+
int32_t llama_batch_ext_add_text(
408408
struct llama_batch_ext * batch,
409409
llama_token token,
410410
llama_pos pos,

0 commit comments

Comments
 (0)