18
18
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
19
19
#include < signal.h>
20
20
#include < unistd.h>
21
+ #include < fcntl.h>
22
+ #define SPECIAL_FILENO 3
21
23
#elif defined (_WIN32)
22
24
#define WIN32_LEAN_AND_MEAN
23
25
#ifndef NOMINMAX
@@ -118,6 +120,16 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
118
120
}
119
121
120
122
int main (int argc, char ** argv) {
123
+ #ifndef _MSC_VER
124
+ // Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` )
125
+ // Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files
126
+ const bool control_token_file_descriptor_is_attached = fcntl (SPECIAL_FILENO, F_GETFL) != -1 ;
127
+ if (!control_token_file_descriptor_is_attached) {
128
+ // Duplicate stdout file descriptor to control token file descriptor to merge the two streams
129
+ dup2 (STDOUT_FILENO, SPECIAL_FILENO);
130
+ }
131
+ #endif
132
+
121
133
gpt_params params;
122
134
g_params = ¶ms;
123
135
@@ -126,6 +138,8 @@ int main(int argc, char ** argv) {
126
138
}
127
139
llama_sampling_params & sparams = params.sparams ;
128
140
141
+ const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar .empty ();
142
+
129
143
#ifndef LOG_DISABLE_LOGS
130
144
log_set_target (log_filename_generator (" main" , " log" ));
131
145
LOG_TEE (" Log start\n " );
@@ -528,8 +542,6 @@ int main(int argc, char ** argv) {
528
542
exit (1 );
529
543
}
530
544
531
- bool should_show_special_tokens = sparams.grammar .empty ();
532
-
533
545
while ((n_remain != 0 && !is_antiprompt) || params.interactive ) {
534
546
// predict
535
547
if (!embd.empty ()) {
@@ -742,18 +754,39 @@ int main(int argc, char ** argv) {
742
754
// display text
743
755
if (input_echo && display) {
744
756
for (auto id : embd) {
745
- const std::string token_str = llama_token_to_piece (ctx, id, !params.conversation && should_show_special_tokens);
746
- printf (" %s" , token_str.c_str ());
747
-
757
+ const std::string token_str = llama_token_to_piece (ctx, id);
758
+
759
+ // Console/Stream Output
760
+ if (!llama_token_is_control_token (llama_get_model (ctx), id)) {
761
+ // Stream Output Token To Standard Output
762
+ fprintf (stdout, " %s" , token_str.c_str ());
763
+ } else if (!params.no_special ) {
764
+ #ifndef _MSC_VER
765
+ if (control_token_file_descriptor_is_attached) {
766
+ // Stream Control Token To Special Token Output. Useful for debugging control token behaviour
767
+ (void )! write (SPECIAL_FILENO, token_str.c_str (), token_str.length ());
768
+ } else
769
+ #endif
770
+ if (control_token_allowed_on_standard_stream)
771
+ {
772
+ // Stream Control Token To Standard Output Stream
773
+ fprintf (stdout, " %s" , token_str.c_str ());
774
+ }
775
+ }
776
+ // Record Displayed Tokens To Log
777
+ // Note: Generated tokens are created one by one hence this check
748
778
if (embd.size () > 1 ) {
779
+ // Incoming Requested Tokens
749
780
input_tokens.push_back (id);
750
781
} else {
782
+ // Outgoing Generated Tokens
751
783
output_tokens.push_back (id);
752
784
output_ss << token_str;
753
785
}
786
+ fflush (stdout);
754
787
}
755
- fflush (stdout);
756
788
}
789
+
757
790
// reset color to default if there is no pending user input
758
791
if (input_echo && (int ) embd_inp.size () == n_consumed) {
759
792
console::set_display (console::reset);
@@ -908,7 +941,7 @@ int main(int argc, char ** argv) {
908
941
for (size_t i = original_size; i < embd_inp.size (); ++i) {
909
942
const llama_token token = embd_inp[i];
910
943
output_tokens.push_back (token);
911
- output_ss << llama_token_to_piece (ctx, token, should_show_special_tokens );
944
+ output_ss << llama_token_to_piece (ctx, token);
912
945
}
913
946
914
947
n_remain -= line_inp.size ();
0 commit comments