Skip to content

Commit 985f30f

Browse files
committed
feat: only send array to stdin, everything else to stderr
1 parent 0d84dfa commit 985f30f

File tree

2 files changed

+30
-30
lines changed

2 files changed

+30
-30
lines changed

bert.cpp

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ static std::string get_ftype(int ftype) {
8383
static void tensor_stats(ggml_tensor * t) {
8484
int32_t src0 = t->src[0] ? t->src[0]->backend : -1;
8585
int32_t src1 = t->src[1] ? t->src[1]->backend : -1;
86-
printf(
86+
fprintf(stderr,
8787
"type = %s, dims = %d, shape = (%ld, %ld, %ld, %ld), backend = %d, src0 = %d, src1 = %d\n",
8888
ggml_type_name(t->type), ggml_n_dims(t), t->ne[0], t->ne[1], t->ne[2], t->ne[3], t->backend, src0, src1
8989
);
@@ -362,14 +362,14 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
362362
const std::string ftype_str = get_ftype(ftype);
363363
const std::string description = get_str(ctx_gguf, KEY_DESCRIPTION);
364364
const std::string name = get_str(ctx_gguf, KEY_NAME);
365-
printf("%s: model name: %s\n", __func__, name.c_str());
366-
printf("%s: description: %s\n", __func__, description.c_str());
367-
printf("%s: GGUF version: %d\n", __func__, version);
368-
printf("%s: alignment: %d\n", __func__, alignment);
369-
printf("%s: n_tensors: %d\n", __func__, n_tensors);
370-
printf("%s: n_kv: %d\n", __func__, n_kv);
371-
printf("%s: ftype: %s\n", __func__, ftype_str.c_str());
372-
printf("\n");
365+
fprintf(stderr, "%s: model name: %s\n", __func__, name.c_str());
366+
fprintf(stderr, "%s: description: %s\n", __func__, description.c_str());
367+
fprintf(stderr, "%s: GGUF version: %d\n", __func__, version);
368+
fprintf(stderr, "%s: alignment: %d\n", __func__, alignment);
369+
fprintf(stderr, "%s: n_tensors: %d\n", __func__, n_tensors);
370+
fprintf(stderr, "%s: n_kv: %d\n", __func__, n_kv);
371+
fprintf(stderr, "%s: ftype: %s\n", __func__, ftype_str.c_str());
372+
fprintf(stderr, "\n");
373373
}
374374
const int n_tensors = gguf_get_n_tensors(ctx_gguf);
375375

@@ -390,14 +390,14 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
390390
hparams.layer_norm_eps = get_f32(ctx_gguf, "layer_norm_eps");
391391

392392
if (verbosity >= 1) {
393-
printf("%s: n_vocab = %d\n", __func__, hparams.n_vocab);
394-
printf("%s: n_max_tokens = %d\n", __func__, hparams.n_max_tokens);
395-
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
396-
printf("%s: n_intermediate = %d\n", __func__, hparams.n_intermediate);
397-
printf("%s: n_head = %d\n", __func__, hparams.n_head);
398-
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
399-
printf("%s: layer_norm_eps = %g\n", __func__, hparams.layer_norm_eps);
400-
printf("\n");
393+
fprintf(stderr, "%s: n_vocab = %d\n", __func__, hparams.n_vocab);
394+
fprintf(stderr, "%s: n_max_tokens = %d\n", __func__, hparams.n_max_tokens);
395+
fprintf(stderr, "%s: n_embd = %d\n", __func__, hparams.n_embd);
396+
fprintf(stderr, "%s: n_intermediate = %d\n", __func__, hparams.n_intermediate);
397+
fprintf(stderr, "%s: n_head = %d\n", __func__, hparams.n_head);
398+
fprintf(stderr, "%s: n_layer = %d\n", __func__, hparams.n_layer);
399+
fprintf(stderr, "%s: layer_norm_eps = %g\n", __func__, hparams.layer_norm_eps);
400+
fprintf(stderr, "\n");
401401
}
402402
}
403403

@@ -432,7 +432,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
432432
size_t tensor_size = ggml_nbytes(cur);
433433
buffer_size += tensor_size;
434434
if (verbosity >= 2) {
435-
printf("%s: tensor[%d]: type = %s, n_dims = %d, name = %s, offset=%zu, type=%d\n", __func__, i,
435+
fprintf(stderr, "%s: tensor[%d]: type = %s, n_dims = %d, name = %s, offset=%zu, type=%d\n", __func__, i,
436436
ggml_type_name(cur->type), ggml_n_dims(cur), cur->name, offset, cur->type);
437437
}
438438
}
@@ -445,7 +445,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
445445
if (!new_bert->backend) {
446446
fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
447447
} else {
448-
printf("%s: using CUDA backend\n", __func__);
448+
fprintf(stderr, "%s: using CUDA backend\n", __func__);
449449
}
450450
}
451451
#endif
@@ -460,7 +460,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
460460
// fall back to CPU backend
461461
if (!new_bert->backend) {
462462
new_bert->backend = ggml_backend_cpu_init();
463-
printf("%s: using CPU backend\n", __func__);
463+
fprintf(stderr, "%s: using CPU backend\n", __func__);
464464
}
465465

466466
// load tensors
@@ -486,7 +486,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
486486
// open model gguf file
487487
auto fin = std::ifstream(fname, std::ios::binary);
488488
if (!fin) {
489-
printf("cannot open model file for loading tensors\n");
489+
fprintf(stderr, "cannot open model file for loading tensors\n");
490490
delete new_bert;
491491
return nullptr;
492492
}
@@ -514,7 +514,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
514514
const size_t offset = gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, i);
515515
fin.seekg(offset, std::ios::beg);
516516
if (!fin) {
517-
printf("%s: failed to seek for tensor %s\n", __func__, name);
517+
fprintf(stderr, "%s: failed to seek for tensor %s\n", __func__, name);
518518
bert_free(new_bert);
519519
return nullptr;
520520
}
@@ -612,7 +612,7 @@ void bert_allocate_buffers(bert_ctx * ctx, int32_t n_max_tokens, int32_t batch_s
612612
ctx->compute_alloc = ggml_allocr_new_from_buffer(ctx->compute_buffer);
613613

614614
if (verbosity >= 1) {
615-
printf("%s: compute allocated memory: %.2f MB\n\n", __func__, compute_memory_buffer_size / 1024.0 / 1024.0);
615+
fprintf(stderr, "%s: compute allocated memory: %.2f MB\n\n", __func__, compute_memory_buffer_size / 1024.0 / 1024.0);
616616
}
617617
}
618618

examples/main.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,9 @@ int main(int argc, char ** argv) {
9797

9898
// print the tokens
9999
for (auto & tok : tokens) {
100-
printf("%d -> %s\n", tok, bert_vocab_id_to_token(bctx, tok));
100+
fprintf(stderr, "%d -> %s\n", tok, bert_vocab_id_to_token(bctx, tok));
101101
}
102-
printf("\n");
102+
fprintf(stderr, "\n");
103103

104104
// create a batch
105105
const int n_embd = bert_n_embd(bctx);
@@ -123,11 +123,11 @@ int main(int argc, char ** argv) {
123123
{
124124
const int64_t t_main_end_us = ggml_time_us();
125125

126-
printf("\n");
127-
printf("%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f);
128-
printf("%s: token time = %8.2f ms / %.2f ms per token\n", __func__, t_token_us/1000.0f, t_token_us/1000.0f/tokens.size());
129-
printf("%s: eval time = %8.2f ms / %.2f ms per token\n", __func__, t_eval_us/1000.0f, t_eval_us/1000.0f/tokens.size());
130-
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
126+
fprintf(stderr, "\n");
127+
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, t_load_us/1000.0f);
128+
fprintf(stderr, "%s: token time = %8.2f ms / %.2f ms per token\n", __func__, t_token_us/1000.0f, t_token_us/1000.0f/tokens.size());
129+
fprintf(stderr, "%s: eval time = %8.2f ms / %.2f ms per token\n", __func__, t_eval_us/1000.0f, t_eval_us/1000.0f/tokens.size());
130+
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
131131
}
132132

133133
return 0;

0 commit comments

Comments
 (0)