Add simple test for special tokens

Igoorx · Igoorx · commit 6f7dabab4415 · 2023-08-07T17:31:13.000-03:00
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp
@@ -14,6 +14,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests()
         { " this is 🦙.cpp",    { 1,    445,    338,  29871,    243,    162,    169,    156,  29889,   8223, }, },
         { "w048 7tuijk dsdfhu", { 1,  29893,  29900,  29946,  29947,  29871,  29955,   9161,  13535,  18031,   2176,   6905, }, },
         { "нещо на Български",  { 1,    821,   4851,    665,   1386,  29713,   1305, }, },
+        { "<🦙>test extra_id_1   test",  { 1, 32003,  1688,  29871,  32001,    259,   1243, }, },
+        { "<🦙>test extra_id_100 test",  { 1, 32003,  1688,  29871,  32002,   1243, }, },
     };
     return _k_tests;
 };
@@ -46,6 +48,10 @@ int main(int argc, char **argv) {
             return 1;
         }
 
+        llama_add_special_token(model, "extra_id_1", 32001);
+        llama_add_special_token(model, "extra_id_100", 32002);
+        llama_add_special_token(model, "<🦙>", 32003);
+
         ctx = llama_new_context_with_model(model, lparams);
 
         if (ctx == NULL) {