Skip to content

Commit 6f7daba

Browse files
committed
Add simple test for special tokens
1 parent d9791bb commit 6f7daba

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

tests/test-tokenizer-0.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests()
1414
{ " this is 🦙.cpp", { 1, 445, 338, 29871, 243, 162, 169, 156, 29889, 8223, }, },
1515
{ "w048 7tuijk dsdfhu", { 1, 29893, 29900, 29946, 29947, 29871, 29955, 9161, 13535, 18031, 2176, 6905, }, },
1616
{ "нещо на Български", { 1, 821, 4851, 665, 1386, 29713, 1305, }, },
17+
{ "<🦙>test extra_id_1 test", { 1, 32003, 1688, 29871, 32001, 259, 1243, }, },
18+
{ "<🦙>test extra_id_100 test", { 1, 32003, 1688, 29871, 32002, 1243, }, },
1719
};
1820
return _k_tests;
1921
};
@@ -46,6 +48,10 @@ int main(int argc, char **argv) {
4648
return 1;
4749
}
4850

51+
llama_add_special_token(model, "extra_id_1", 32001);
52+
llama_add_special_token(model, "extra_id_100", 32002);
53+
llama_add_special_token(model, "<🦙>", 32003);
54+
4955
ctx = llama_new_context_with_model(model, lparams);
5056

5157
if (ctx == NULL) {

0 commit comments

Comments
 (0)