From bcd24f8974c9bfeacd35eaf244bc1ed4e6a0a36c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 20 May 2024 01:57:43 +1000 Subject: [PATCH 01/10] main: use seperate stream for control characters --- examples/main/main.cpp | 36 ++++++++++++++++++++++++++++++++---- llama.cpp | 4 ++++ llama.h | 3 +++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8153a71fb5791..5c453a57ed605 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -528,7 +528,20 @@ int main(int argc, char ** argv) { exit(1); } - bool should_show_special_tokens = sparams.grammar.empty(); + // Create the pipe for special token handling + int stok_pipe[2] = {0}; + if (pipe(stok_pipe) == -1) { + fprintf(stderr, "%s: failed to initialize special token output stream\n", __func__); + exit(1); + } + + close(stok_pipe[0]); // Read Special Token Not In Use + + FILE *special_token_stream_output_fd = fdopen(stok_pipe[1], "w"); + if (special_token_stream_output_fd == NULL) { + fprintf(stderr, "%s: failed to open special token output stream\n", __func__); + exit(1); + } while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict @@ -742,18 +755,31 @@ int main(int argc, char ** argv) { // display text if (input_echo && display) { for (auto id : embd) { - const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation && should_show_special_tokens); - printf("%s", token_str.c_str()); + const std::string token_str = llama_token_to_piece(ctx, id); + // Console/Stream Output + if (llama_token_is_control_token(llama_get_model(ctx), id)) { + // Stream Output Token To Special Token Output + fprintf(special_token_stream_output_fd, "%s", token_str.c_str()); + } else { + // Stream Output Token To Standard Output + fprintf(stdout, "%s", token_str.c_str()); + } + + // Record Displayed Tokens To Log + // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { + // Incoming Requested Tokens input_tokens.push_back(id); } else { + // Outgoing Generated Tokens output_tokens.push_back(id); output_ss << token_str; } } fflush(stdout); } + // reset color to default if there is no pending user input if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); @@ -908,7 +934,7 @@ int main(int argc, char ** argv) { for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; output_tokens.push_back(token); - output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); + output_ss << llama_token_to_piece(ctx, token); } n_remain -= line_inp.size(); @@ -957,6 +983,8 @@ int main(int argc, char ** argv) { llama_sampling_free(ctx_sampling); llama_backend_free(); + fclose(special_token_stream_output_fd); + #ifndef LOG_DISABLE_LOGS LOG_TEE("Log end\n"); #endif // LOG_DISABLE_LOGS diff --git a/llama.cpp b/llama.cpp index b752ddc6b401f..f41c6e5b68192 100644 --- a/llama.cpp +++ b/llama.cpp @@ -17634,6 +17634,10 @@ bool llama_token_is_eog(const struct llama_model * model, llama_token token) { ); } +bool llama_token_is_control_token(const struct llama_model * model, llama_token token) { + return llama_is_control_token(model->vocab, token); +} + llama_token llama_token_bos(const struct llama_model * model) { return model->vocab.special_bos_id; } diff --git a/llama.h b/llama.h index 612e32c4ea058..7cacb3d645a40 100644 --- a/llama.h +++ b/llama.h @@ -816,6 +816,9 @@ extern "C" { // Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.) LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token); + // Identify if Token Id is a control token or a render-able token + LLAMA_API bool llama_token_is_control_token(const struct llama_model * model, llama_token token); + // Special tokens LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence From 9f445a793d93dc9d4cee5097e83c179d0cddc34b Mon Sep 17 00:00:00 2001 From: brian khuu Date: Mon, 20 May 2024 22:35:08 +1000 Subject: [PATCH 02/10] main: use dprintf and add --ctrl-token-no-out and --ctrl-token-fd-out --- common/common.cpp | 13 ++++++++++++- common/common.h | 2 ++ examples/main/main.cpp | 36 +++++++++++++++--------------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index e624fc7f35352..4cc482a302904 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -905,6 +905,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.interactive_specials = true; return true; } + if (arg == "--ctrl-token-no-out") { + params.ctrl_token_no_out = true; + return true; + } + if (arg == "--ctrl-token-fd-out") { + params.ctrl_token_fd_out = true; + return true; + } if (arg == "--embedding") { params.embedding = true; return true; @@ -1433,7 +1441,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --version show version and build info\n"); printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); - printf(" --interactive-first run in interactive mode and wait for input right away\n"); + printf(" --ctrl-token-no-out control tokens output disabled\n"); +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + printf(" --ctrl-token-fd-out control tokens sent to file descriptor 3 out of band\n"); +#endif printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/common/common.h b/common/common.h index 566490e2f881a..20a776f6bf40b 100644 --- a/common/common.h +++ b/common/common.h @@ -142,6 +142,8 @@ struct gpt_params { bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode + bool ctrl_token_no_out = false; // disable control token output + bool ctrl_token_fd_out = false; // enable control token output and redirect it to file descriptor 3 bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 5c453a57ed605..8effd02a8bdc0 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -18,6 +18,7 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include +#define CONTROL_TOKEN_FILE_DESCRIPTOR (3) #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -528,21 +529,6 @@ int main(int argc, char ** argv) { exit(1); } - // Create the pipe for special token handling - int stok_pipe[2] = {0}; - if (pipe(stok_pipe) == -1) { - fprintf(stderr, "%s: failed to initialize special token output stream\n", __func__); - exit(1); - } - - close(stok_pipe[0]); // Read Special Token Not In Use - - FILE *special_token_stream_output_fd = fdopen(stok_pipe[1], "w"); - if (special_token_stream_output_fd == NULL) { - fprintf(stderr, "%s: failed to open special token output stream\n", __func__); - exit(1); - } - while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -758,12 +744,22 @@ int main(int argc, char ** argv) { const std::string token_str = llama_token_to_piece(ctx, id); // Console/Stream Output - if (llama_token_is_control_token(llama_get_model(ctx), id)) { - // Stream Output Token To Special Token Output - fprintf(special_token_stream_output_fd, "%s", token_str.c_str()); - } else { + if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); + } else if (!params.ctrl_token_no_out) { +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) + if (params.ctrl_token_fd_out) { + // Stream Control Token To Special Token Output. Useful for debugging control token behaviour + dprintf(CONTROL_TOKEN_FILE_DESCRIPTOR, "%s", token_str.c_str()); + } + else +#endif + if (!params.conversation && sparams.grammar.empty()) + { + // Stream Control Token To Standard Output as long as we are not in a conversation or grammar output + fprintf(stdout, "%s", token_str.c_str()); + } } // Record Displayed Tokens To Log @@ -983,8 +979,6 @@ int main(int argc, char ** argv) { llama_sampling_free(ctx_sampling); llama_backend_free(); - fclose(special_token_stream_output_fd); - #ifndef LOG_DISABLE_LOGS LOG_TEE("Log end\n"); #endif // LOG_DISABLE_LOGS From ad4b6097c0014e94ff80fa8b5592c05f5964a832 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 00:37:12 +1000 Subject: [PATCH 03/10] main: dprintf isn't part of the IEEE POSIX standard. Just use write(). --- examples/main/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8effd02a8bdc0..4019485124029 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -748,10 +748,11 @@ int main(int argc, char ** argv) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) +#ifndef _MSC_VER if (params.ctrl_token_fd_out) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - dprintf(CONTROL_TOKEN_FILE_DESCRIPTOR, "%s", token_str.c_str()); + ssize_t result = write(CONTROL_TOKEN_FILE_DESCRIPTOR, token_str.c_str(), token_str.length()); + (void) result; } else #endif From c9ea9df7fbc04747772fb7e765ba8c9a35a9f00c Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:44:50 +1000 Subject: [PATCH 04/10] main: remove --ctrl-token-fd-out in favor for fcntl() based detection --- common/common.cpp | 7 ------- common/common.h | 1 - examples/main/main.cpp | 29 +++++++++++++++++++---------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 4cc482a302904..b256eef9e27cc 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -909,10 +909,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.ctrl_token_no_out = true; return true; } - if (arg == "--ctrl-token-fd-out") { - params.ctrl_token_fd_out = true; - return true; - } if (arg == "--embedding") { params.embedding = true; return true; @@ -1442,9 +1438,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); printf(" --ctrl-token-no-out control tokens output disabled\n"); -#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) - printf(" --ctrl-token-fd-out control tokens sent to file descriptor 3 out of band\n"); -#endif printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/common/common.h b/common/common.h index 20a776f6bf40b..f7556cfec16b8 100644 --- a/common/common.h +++ b/common/common.h @@ -143,7 +143,6 @@ struct gpt_params { bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode bool ctrl_token_no_out = false; // disable control token output - bool ctrl_token_fd_out = false; // enable control token output and redirect it to file descriptor 3 bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 4019485124029..aded5bbf1eb54 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -18,7 +18,8 @@ #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) #include #include -#define CONTROL_TOKEN_FILE_DESCRIPTOR (3) +#include +#define CONTROL_TOKEN_FILENO (3) #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -529,6 +530,14 @@ int main(int argc, char ** argv) { exit(1); } +#ifndef _MSC_VER + if (fcntl(CONTROL_TOKEN_FILENO, F_GETFL) == -1) { + // Control Token File Descriptor has nothing attached to it + // make control token file descriptor be an alias of stdout + dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); + } +#endif + while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -746,23 +755,23 @@ int main(int argc, char ** argv) { // Console/Stream Output if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output + fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { + if (!params.conversation && sparams.grammar.empty()) + { + // Stream Control Token To Special Token Output. Useful for debugging control token behaviour + fflush(stdout); + fprintf(stdout, "%s", token_str.c_str()); + } #ifndef _MSC_VER - if (params.ctrl_token_fd_out) { + else { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - ssize_t result = write(CONTROL_TOKEN_FILE_DESCRIPTOR, token_str.c_str(), token_str.length()); + ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); (void) result; } - else #endif - if (!params.conversation && sparams.grammar.empty()) - { - // Stream Control Token To Standard Output as long as we are not in a conversation or grammar output - fprintf(stdout, "%s", token_str.c_str()); - } } - // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check if (embd.size() > 1) { From 5032f18f204f0e8f645da3b4607e071bea585b07 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:47:49 +1000 Subject: [PATCH 05/10] common.cpp: accidentally removed --interactive-first --- common/common.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/common/common.cpp b/common/common.cpp index b256eef9e27cc..2a6990f3474b2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1437,6 +1437,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --version show version and build info\n"); printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); + printf(" --interactive-first run in interactive mode and wait for input right away\n"); printf(" --ctrl-token-no-out control tokens output disabled\n"); printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); From 90456a5717ca6ab20750eba147d692ee4cbfb2f0 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 04:57:26 +1000 Subject: [PATCH 06/10] main: only merge stdout and control token if not in conversation or grammar mode --- examples/main/main.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index aded5bbf1eb54..66f6695b7ab1f 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -531,9 +531,10 @@ int main(int argc, char ** argv) { } #ifndef _MSC_VER - if (fcntl(CONTROL_TOKEN_FILENO, F_GETFL) == -1) { - // Control Token File Descriptor has nothing attached to it - // make control token file descriptor be an alias of stdout + const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; + if (!control_token_descriptor_is_attached && !params.conversation && sparams.grammar.empty()) { + // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout + // This is not done however if we are in conversation mode or grammar mode as that is typically discarded dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); } #endif From 50048f5b45ee57fc0c1d233ea6bc08e801ccbe13 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 11:20:00 +1000 Subject: [PATCH 07/10] main: rejig control token descriptor handling --- examples/main/main.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 66f6695b7ab1f..cbc65ba080b79 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -530,9 +530,11 @@ int main(int argc, char ** argv) { exit(1); } + const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); + #ifndef _MSC_VER const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; - if (!control_token_descriptor_is_attached && !params.conversation && sparams.grammar.empty()) { + if (control_token_allowed_on_standard_stream && !control_token_descriptor_is_attached) { // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout // This is not done however if we are in conversation mode or grammar mode as that is typically discarded dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); @@ -759,19 +761,19 @@ int main(int argc, char ** argv) { fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { - if (!params.conversation && sparams.grammar.empty()) - { - // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - fflush(stdout); - fprintf(stdout, "%s", token_str.c_str()); - } #ifndef _MSC_VER - else { + if (control_token_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); (void) result; - } + } else #endif + if (control_token_allowed_on_standard_stream) + { + // Stream Control Token To Standard Output Stream + fflush(stdout); + fprintf(stdout, "%s", token_str.c_str()); + } } // Record Displayed Tokens To Log // Note: Generated tokens are created one by one hence this check From c1e8a6d1c03a44eaf90fb120a102f5d7ea1c6e98 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 14:58:34 +1000 Subject: [PATCH 08/10] main: must check pipe status on very top of program --- examples/main/main.cpp | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index cbc65ba080b79..d997c713e2649 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -120,6 +120,12 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v } int main(int argc, char ** argv) { +#ifndef _MSC_VER + // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) + // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files + const bool control_token_file_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; +#endif + gpt_params params; g_params = ¶ms; @@ -128,6 +134,16 @@ int main(int argc, char ** argv) { } llama_sampling_params & sparams = params.sparams; + const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); + +#ifndef _MSC_VER + // Merge normal token stream and control token streams together only if not in conversation or grammar mode + if (control_token_allowed_on_standard_stream && !control_token_file_descriptor_is_attached) { + // Duplicate stdout file descriptor to control token file descriptor to merge the two streams + dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); + } +#endif + #ifndef LOG_DISABLE_LOGS log_set_target(log_filename_generator("main", "log")); LOG_TEE("Log start\n"); @@ -530,17 +546,6 @@ int main(int argc, char ** argv) { exit(1); } - const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); - -#ifndef _MSC_VER - const bool control_token_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; - if (control_token_allowed_on_standard_stream && !control_token_descriptor_is_attached) { - // Control Token File Descriptor has nothing attached to it so make control token file descriptor be an alias of stdout - // This is not done however if we are in conversation mode or grammar mode as that is typically discarded - dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); - } -#endif - while ((n_remain != 0 && !is_antiprompt) || params.interactive) { // predict if (!embd.empty()) { @@ -758,20 +763,18 @@ int main(int argc, char ** argv) { // Console/Stream Output if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output - fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } else if (!params.ctrl_token_no_out) { #ifndef _MSC_VER - if (control_token_descriptor_is_attached) { + if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - ssize_t result = write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); - (void) result; + fflush(stdout); // Ensure control token is always appended to stdout stream + (void)! write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); } else #endif if (control_token_allowed_on_standard_stream) { // Stream Control Token To Standard Output Stream - fflush(stdout); fprintf(stdout, "%s", token_str.c_str()); } } From 7d52482bacf2eed07fbe5c18c64001892aab8c00 Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 16:00:59 +1000 Subject: [PATCH 09/10] main: renamed --no-special from --ctrl-token-no-out and other refactoring --- common/common.cpp | 4 ++-- examples/main/main.cpp | 21 ++++++++------------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 2a6990f3474b2..a026fe50935b9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -905,7 +905,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.interactive_specials = true; return true; } - if (arg == "--ctrl-token-no-out") { + if (arg == "--no-special") { params.ctrl_token_no_out = true; return true; } @@ -1438,7 +1438,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -i, --interactive run in interactive mode\n"); printf(" --interactive-specials allow special tokens in user text, in interactive mode\n"); printf(" --interactive-first run in interactive mode and wait for input right away\n"); - printf(" --ctrl-token-no-out control tokens output disabled\n"); + printf(" --no-special control tokens output disabled\n"); printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n"); printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n"); printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n"); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d997c713e2649..d5288a5e62a65 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -19,7 +19,7 @@ #include #include #include -#define CONTROL_TOKEN_FILENO (3) +#define SPECIAL_FILENO 3 #elif defined (_WIN32) #define WIN32_LEAN_AND_MEAN #ifndef NOMINMAX @@ -123,7 +123,11 @@ int main(int argc, char ** argv) { #ifndef _MSC_VER // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files - const bool control_token_file_descriptor_is_attached = fcntl(CONTROL_TOKEN_FILENO, F_GETFL) != -1; + const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1; + if (!control_token_file_descriptor_is_attached) { + // Duplicate stdout file descriptor to control token file descriptor to merge the two streams + dup2(STDOUT_FILENO, SPECIAL_FILENO); + } #endif gpt_params params; @@ -136,14 +140,6 @@ int main(int argc, char ** argv) { const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); -#ifndef _MSC_VER - // Merge normal token stream and control token streams together only if not in conversation or grammar mode - if (control_token_allowed_on_standard_stream && !control_token_file_descriptor_is_attached) { - // Duplicate stdout file descriptor to control token file descriptor to merge the two streams - dup2(STDOUT_FILENO, CONTROL_TOKEN_FILENO); - } -#endif - #ifndef LOG_DISABLE_LOGS log_set_target(log_filename_generator("main", "log")); LOG_TEE("Log start\n"); @@ -768,8 +764,7 @@ int main(int argc, char ** argv) { #ifndef _MSC_VER if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour - fflush(stdout); // Ensure control token is always appended to stdout stream - (void)! write(CONTROL_TOKEN_FILENO, token_str.c_str(), token_str.length()); + (void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length()); } else #endif if (control_token_allowed_on_standard_stream) @@ -788,8 +783,8 @@ int main(int argc, char ** argv) { output_tokens.push_back(id); output_ss << token_str; } + fflush(stdout); } - fflush(stdout); } // reset color to default if there is no pending user input From 8f76ba54bac54a4fe8b33f570740dac61a08426f Mon Sep 17 00:00:00 2001 From: brian khuu Date: Tue, 21 May 2024 16:03:18 +1000 Subject: [PATCH 10/10] main: refactor ctrl_token_no_out --> no_special --- common/common.cpp | 2 +- common/common.h | 2 +- examples/main/main.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index a026fe50935b9..ffc98137dbab8 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -906,7 +906,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa return true; } if (arg == "--no-special") { - params.ctrl_token_no_out = true; + params.no_special = true; return true; } if (arg == "--embedding") { diff --git a/common/common.h b/common/common.h index f7556cfec16b8..87e771ca10a02 100644 --- a/common/common.h +++ b/common/common.h @@ -142,7 +142,7 @@ struct gpt_params { bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode - bool ctrl_token_no_out = false; // disable control token output + bool no_special = false; // disable control token output bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix) bool chatml = false; // chatml mode (used for models trained on chatml syntax) bool prompt_cache_all = false; // save user input and generations to prompt cache diff --git a/examples/main/main.cpp b/examples/main/main.cpp index d5288a5e62a65..fc54861811184 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -760,7 +760,7 @@ int main(int argc, char ** argv) { if (!llama_token_is_control_token(llama_get_model(ctx), id)) { // Stream Output Token To Standard Output fprintf(stdout, "%s", token_str.c_str()); - } else if (!params.ctrl_token_no_out) { + } else if (!params.no_special) { #ifndef _MSC_VER if (control_token_file_descriptor_is_attached) { // Stream Control Token To Special Token Output. Useful for debugging control token behaviour