cmp-nct-org · cmp-nct · Nov 17, 2023 · Nov 17, 2023 · Nov 17, 2023 · Nov 19, 2023
diff --git a/common/common.cpp b/common/common.cpp
@@ -1024,12 +1024,12 @@ std::vector<llama_token> llama_tokenize(
     return result;
 }
 
-std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
+std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
     std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
     if (n_tokens < 0) {
         result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
         GGML_ASSERT(check == -n_tokens);
     } else {
         result.resize(n_tokens);

diff --git a/common/common.h b/common/common.h
@@ -180,9 +180,10 @@ std::vector<llama_token> llama_tokenize(
 
 // tokenizes a token into a piece
 // should work similar to Python's `tokenizer.id_to_piece`
+// special = true includes control/eos pieces, default is omitting them
 std::string llama_token_to_piece(
         const struct llama_context * ctx,
-                       llama_token   token);
+                       llama_token   token, bool special = false);
 
 // TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
 //       that takes into account the tokenizer type and decides how to handle the leading space

diff --git a/llama.cpp b/llama.cpp
@@ -1042,10 +1042,10 @@ static void ggml_offload_nop(struct ggml_tensor * tensor) {
 
 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
     std::vector<char> result(8, 0);
-    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
     if (n_tokens < 0) {
         result.resize(-n_tokens);
-        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
+        int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), false);
         GGML_ASSERT(check == -n_tokens);
     }
     else {
@@ -9338,10 +9338,15 @@ static std::string llama_decode_text(const std::string & text) {
 
 // does not write null-terminator to buf
 int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
+    return llama_token_to_piece(model, token, buf, length, false);
+}
+
+// does not write null-terminator to buf
+int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length, bool print_all_types = false) {
     if (0 <= token && token < llama_n_vocab(model)) {
         switch (llama_vocab_get_type(model->vocab)) {
         case LLAMA_VOCAB_TYPE_SPM: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                 std::string result = model->vocab.id_to_token[token].text;
                 llama_unescape_whitespace(result);
                 if (length < (int) result.length()) {
@@ -9371,7 +9376,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
             break;
         }
         case LLAMA_VOCAB_TYPE_BPE: {
-            if (llama_is_normal_token(model->vocab, token)) {
+            if (print_all_types || llama_is_normal_token(model->vocab, token)) {
                 std::string result = model->vocab.id_to_token[token].text;
                 result = llama_decode_text(result);
                 if (length < (int) result.length()) {

diff --git a/llama.h b/llama.h
@@ -550,7 +550,8 @@ extern "C" {
               const struct llama_model * model,
                            llama_token   token,
                                   char * buf,
-                                  int    length);
+                                  int    length,
+                                  bool   print_all_types);
 
     //
     // Grammar