-
Notifications
You must be signed in to change notification settings - Fork 1
main: use seperate stream for control characters #4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bcd24f8
9f445a7
ad4b609
c9ea9df
5032f18
90456a5
50048f5
c1e8a6d
7d52482
8f76ba5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,8 @@ | |
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) | ||
#include <signal.h> | ||
#include <unistd.h> | ||
#include <fcntl.h> | ||
#define SPECIAL_FILENO 3 | ||
#elif defined (_WIN32) | ||
#define WIN32_LEAN_AND_MEAN | ||
#ifndef NOMINMAX | ||
|
@@ -118,6 +120,16 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v | |
} | ||
|
||
int main(int argc, char ** argv) { | ||
#ifndef _MSC_VER | ||
// Check if we have an external attachment to a file descriptor for out of band control tokens (e.g. bash `3>/dev/null` ) | ||
// Placed here to avoid file descriptor being polluted by gpt_params_parse() opening files | ||
const bool control_token_file_descriptor_is_attached = fcntl(SPECIAL_FILENO, F_GETFL) != -1; | ||
if (!control_token_file_descriptor_is_attached) { | ||
// Duplicate stdout file descriptor to control token file descriptor to merge the two streams | ||
dup2(STDOUT_FILENO, SPECIAL_FILENO); | ||
} | ||
#endif | ||
|
||
gpt_params params; | ||
g_params = ¶ms; | ||
|
||
|
@@ -126,6 +138,8 @@ int main(int argc, char ** argv) { | |
} | ||
llama_sampling_params & sparams = params.sparams; | ||
|
||
const bool control_token_allowed_on_standard_stream = !params.conversation && sparams.grammar.empty(); | ||
|
||
#ifndef LOG_DISABLE_LOGS | ||
log_set_target(log_filename_generator("main", "log")); | ||
LOG_TEE("Log start\n"); | ||
|
@@ -528,8 +542,6 @@ int main(int argc, char ** argv) { | |
exit(1); | ||
} | ||
|
||
bool should_show_special_tokens = sparams.grammar.empty(); | ||
|
||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) { | ||
// predict | ||
if (!embd.empty()) { | ||
|
@@ -742,18 +754,39 @@ int main(int argc, char ** argv) { | |
// display text | ||
if (input_echo && display) { | ||
for (auto id : embd) { | ||
const std::string token_str = llama_token_to_piece(ctx, id, !params.conversation && should_show_special_tokens); | ||
printf("%s", token_str.c_str()); | ||
|
||
const std::string token_str = llama_token_to_piece(ctx, id); | ||
|
||
// Console/Stream Output | ||
if (!llama_token_is_control_token(llama_get_model(ctx), id)) { | ||
// Stream Output Token To Standard Output | ||
fprintf(stdout, "%s", token_str.c_str()); | ||
} else if (!params.no_special) { | ||
#ifndef _MSC_VER | ||
if (control_token_file_descriptor_is_attached) { | ||
// Stream Control Token To Special Token Output. Useful for debugging control token behaviour | ||
(void)! write(SPECIAL_FILENO, token_str.c_str(), token_str.length()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. extraneous There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also I'm not sure why you'd have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. intentional or it breaks
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, apparently gcc and glibc have collaborated to do an extremely dumb thing. fine, keep it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Context: https://bugs.llvm.org/show_bug.cgi?id=51228 A possible appproach according to @mrdomino #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
write(...);
#pragma GCC diagnostic pop |
||
} else | ||
#endif | ||
if (control_token_allowed_on_standard_stream) | ||
{ | ||
// Stream Control Token To Standard Output Stream | ||
fprintf(stdout, "%s", token_str.c_str()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. stdout is buffered, so if you are going to be using it, you will need to make sure You could also switch to calling write on fd 1 and make the output fully unbuffered, but I think this would be suboptimal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added fflush(stdout) before every fprintf() now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why wouldn't you flush after each print? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jart my theory was that you'd want to take advantage of the buffering. If you don't care about buffering output then absolutely, or even ETA: the other thing is that if you have anything else doing anything to stdout it can mess with the interleaving... it just seemed simpler to me to constrain it to where you were doing the token write |
||
} | ||
} | ||
// Record Displayed Tokens To Log | ||
// Note: Generated tokens are created one by one hence this check | ||
if (embd.size() > 1) { | ||
// Incoming Requested Tokens | ||
input_tokens.push_back(id); | ||
} else { | ||
// Outgoing Generated Tokens | ||
output_tokens.push_back(id); | ||
output_ss << token_str; | ||
} | ||
mofosyne marked this conversation as resolved.
Show resolved
Hide resolved
|
||
fflush(stdout); | ||
} | ||
fflush(stdout); | ||
} | ||
|
||
mofosyne marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// reset color to default if there is no pending user input | ||
if (input_echo && (int) embd_inp.size() == n_consumed) { | ||
console::set_display(console::reset); | ||
|
@@ -908,7 +941,7 @@ int main(int argc, char ** argv) { | |
for (size_t i = original_size; i < embd_inp.size(); ++i) { | ||
const llama_token token = embd_inp[i]; | ||
output_tokens.push_back(token); | ||
output_ss << llama_token_to_piece(ctx, token, should_show_special_tokens); | ||
output_ss << llama_token_to_piece(ctx, token); | ||
mofosyne marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
n_remain -= line_inp.size(); | ||
|
Uh oh!
There was an error while loading. Please reload this page.