Skip to content

Commit 724fa67

Browse files
authored
perplexity.cpp : better way to deal wirh spm prepending space
1 parent d9a9b09 commit 724fa67

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

examples/perplexity/perplexity.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
391391
hs_data[i].context = prompt_lines[idx*6];
392392
hs_data[i].gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
393393
for (size_t j=0; j < 4; j++) {
394-
hs_data[i].ending[j] = " " + prompt_lines[idx*6+2+j];
394+
hs_data[i].ending[j] = prompt_lines[idx*6+2+j];
395395
}
396396

397397
// Delete the selected random example from the prompt
@@ -415,7 +415,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
415415

416416
// Do the 1st ending
417417
// In this case we include the context when evaluating
418-
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[0], add_bos);
418+
auto query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + " " + hs_data[task_idx].ending[0], add_bos);
419419
auto query_size = query_embd.size();
420420
//printf("First query: %d\n",(int)query_size);
421421

@@ -462,11 +462,11 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
462462
for (size_t ending_idx = 1; ending_idx < 4; ending_idx++) {
463463

464464
// Tokenize the query
465-
// SPM tokenizer: Do not tokenize the starting space in the ending since it is always added by the tokenizer
465+
// SPM tokenizer: Do not prepend a space since the tokenizer always do that
466466
if (is_spm) {
467-
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx].substr(1,hs_data[task_idx].ending[ending_idx].size()-1), false);
468-
} else {
469467
query_embd = ::llama_tokenize(ctx, hs_data[task_idx].ending[ending_idx], false);
468+
} else {
469+
query_embd = ::llama_tokenize(ctx, " " + hs_data[task_idx].ending[ending_idx], false);
470470
}
471471

472472
query_size = query_embd.size();

0 commit comments

Comments
 (0)