Skip to content

Commit 178d2b3

Browse files
authored
main: use seperate stream for control characters (#4)
1 parent ea70e28 commit 178d2b3

File tree

5 files changed

+53
-7
lines changed

5 files changed

+53
-7
lines changed

common/common.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
905905
params.interactive_specials = true;
906906
return true;
907907
}
908+
if (arg == "--no-special") {
909+
params.no_special = true;
910+
return true;
911+
}
908912
if (arg == "--embedding") {
909913
params.embedding = true;
910914
return true;
@@ -1434,6 +1438,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
14341438
printf(" -i, --interactive run in interactive mode\n");
14351439
printf(" --interactive-specials allow special tokens in user text, in interactive mode\n");
14361440
printf(" --interactive-first run in interactive mode and wait for input right away\n");
1441+
printf(" --no-special control tokens output disabled\n");
14371442
printf(" -cnv, --conversation run in conversation mode (does not print special tokens and suffix/prefix)\n");
14381443
printf(" -ins, --instruct run in instruction mode (use with Alpaca models)\n");
14391444
printf(" -cml, --chatml run in chatml mode (use with ChatML-compatible models)\n");

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ struct gpt_params {
142142
bool use_color = false; // use color to distinguish generations and inputs
143143
bool interactive = false; // interactive mode
144144
bool interactive_specials = false; // whether to allow special tokens from user, during interactive mode
145+
bool no_special = false; // disable control token output
145146
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
146147
bool chatml = false; // chatml mode (used for models trained on chatml syntax)
147148
bool prompt_cache_all = false; // save user input and generations to prompt cache

examples/main/main.cpp

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
1919
#include <signal.h>
2020
#include <unistd.h>
21+
#include <fcntl.h>
22+
#define SPECIAL_FILENO 3
2123
#elif defined (_WIN32)
2224
#define WIN32_LEAN_AND_MEAN
2325
#ifndef NOMINMAX
@@ -118,6 +120,16 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
118120
}
119121

120122
int main(int argc, char ** argv) {
123+
#ifndef _MSC_VER
124+
// Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` )
125+
// Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files
126+
const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1;
127+
if (!control_token_file_descriptor_is_attached) {
128+
// Duplicate stdout file descriptor to control token file descriptor to merge the two streams
129+
dup2(STDOUT_FILENO, SPECIAL_FILENO);
130+
}
131+
#endif
132+
121133
gpt_params params;
122134
g_params = &params;
123135

@@ -126,6 +138,8 @@ int main(int argc, char ** argv) {
126138
}
127139
llama_sampling_params & sparams = params.sparams;
128140

141+
const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty();
142+
129143
#ifndef LOG_DISABLE_LOGS
130144
log_set_target(log_filename_generator("main", "log"));
131145
LOG_TEE("Log start\n");
@@ -528,8 +542,6 @@ int main(int argc, char ** argv) {
528542
exit(1);
529543
}
530544

531-
bool should_show_special_tokens = sparams.grammar.empty();
532-
533545
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
534546
// predict
535547
if (!embd.empty()) {
@@ -742,18 +754,39 @@ int main(int argc, char ** argv) {
742754
// display text
743755
if (input_echo && display) {
744756
for (auto id : embd) {
745-
const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation && should_show_special_tokens);
746-
printf("%s", token_str.c_str());
747-
757+
const std::string token_str = llama_token_to_piece(ctx, id);
758+
759+
// Console/Stream Output
760+
if (!llama_token_is_control_token(llama_get_model(ctx), id)) {
761+
// Stream Output Token To Standard Output
762+
fprintf(stdout, "%s", token_str.c_str());
763+
} else if (!params.no_special) {
764+
#ifndef _MSC_VER
765+
if (control_token_file_descriptor_is_attached) {
766+
// Stream Control Token To Special Token Output. Useful for debugging control token behaviour
767+
(void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length());
768+
} else
769+
#endif
770+
if (control_token_allowed_on_standard_stream)
771+
{
772+
// Stream Control Token To Standard Output Stream
773+
fprintf(stdout, "%s", token_str.c_str());
774+
}
775+
}
776+
// Record Displayed Tokens To Log
777+
// Note: Generated tokens are created one by one hence this check
748778
if (embd.size() > 1) {
779+
// Incoming Requested Tokens
749780
input_tokens.push_back(id);
750781
} else {
782+
// Outgoing Generated Tokens
751783
output_tokens.push_back(id);
752784
output_ss << token_str;
753785
}
786+
fflush(stdout);
754787
}
755-
fflush(stdout);
756788
}
789+
757790
// reset color to default if there is no pending user input
758791
if (input_echo && (int) embd_inp.size() == n_consumed) {
759792
console::set_display(console::reset);
@@ -908,7 +941,7 @@ int main(int argc, char ** argv) {
908941
for (size_t i = original_size; i < embd_inp.size(); ++i) {
909942
const llama_token token = embd_inp[i];
910943
output_tokens.push_back(token);
911-
output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens);
944+
output_ss << llama_token_to_piece(ctx, token);
912945
}
913946

914947
n_remain -= line_inp.size();

llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17634,6 +17634,10 @@ bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
1763417634
);
1763517635
}
1763617636

17637+
bool llama_token_is_control_token(const struct llama_model * model, llama_token token) {
17638+
return llama_is_control_token(model->vocab, token);
17639+
}
17640+
1763717641
llama_token llama_token_bos(const struct llama_model * model) {
1763817642
return model->vocab.special_bos_id;
1763917643
}

llama.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,9 @@ extern "C" {
816816
// Check if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)
817817
LLAMA_API bool llama_token_is_eog(const struct llama_model * model, llama_token token);
818818

819+
// Identify if Token Id is a control token or a render-able token
820+
LLAMA_API bool llama_token_is_control_token(const struct llama_model * model, llama_token token);
821+
819822
// Special tokens
820823
LLAMA_API llama_token llama_token_bos(const struct llama_model * model); // beginning-of-sentence
821824
LLAMA_API llama_token llama_token_eos(const struct llama_model * model); // end-of-sentence

0 commit comments

Comments
 (0)