@@ -83,7 +83,7 @@ static std::string get_ftype(int ftype) {
83
83
static void tensor_stats (ggml_tensor * t) {
84
84
int32_t src0 = t->src [0 ] ? t->src [0 ]->backend : -1 ;
85
85
int32_t src1 = t->src [1 ] ? t->src [1 ]->backend : -1 ;
86
- printf (
86
+ fprintf (stderr,
87
87
" type = %s, dims = %d, shape = (%ld, %ld, %ld, %ld), backend = %d, src0 = %d, src1 = %d\n " ,
88
88
ggml_type_name (t->type ), ggml_n_dims (t), t->ne [0 ], t->ne [1 ], t->ne [2 ], t->ne [3 ], t->backend , src0, src1
89
89
);
@@ -362,14 +362,14 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
362
362
const std::string ftype_str = get_ftype (ftype);
363
363
const std::string description = get_str (ctx_gguf, KEY_DESCRIPTION);
364
364
const std::string name = get_str (ctx_gguf, KEY_NAME);
365
- printf ( " %s: model name: %s\n " , __func__, name.c_str ());
366
- printf ( " %s: description: %s\n " , __func__, description.c_str ());
367
- printf ( " %s: GGUF version: %d\n " , __func__, version);
368
- printf ( " %s: alignment: %d\n " , __func__, alignment);
369
- printf ( " %s: n_tensors: %d\n " , __func__, n_tensors);
370
- printf ( " %s: n_kv: %d\n " , __func__, n_kv);
371
- printf ( " %s: ftype: %s\n " , __func__, ftype_str.c_str ());
372
- printf ( " \n " );
365
+ fprintf (stderr, " %s: model name: %s\n " , __func__, name.c_str ());
366
+ fprintf (stderr, " %s: description: %s\n " , __func__, description.c_str ());
367
+ fprintf (stderr, " %s: GGUF version: %d\n " , __func__, version);
368
+ fprintf (stderr, " %s: alignment: %d\n " , __func__, alignment);
369
+ fprintf (stderr, " %s: n_tensors: %d\n " , __func__, n_tensors);
370
+ fprintf (stderr, " %s: n_kv: %d\n " , __func__, n_kv);
371
+ fprintf (stderr, " %s: ftype: %s\n " , __func__, ftype_str.c_str ());
372
+ fprintf (stderr, " \n " );
373
373
}
374
374
const int n_tensors = gguf_get_n_tensors (ctx_gguf);
375
375
@@ -390,14 +390,14 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
390
390
hparams.layer_norm_eps = get_f32 (ctx_gguf, " layer_norm_eps" );
391
391
392
392
if (verbosity >= 1 ) {
393
- printf ( " %s: n_vocab = %d\n " , __func__, hparams.n_vocab );
394
- printf ( " %s: n_max_tokens = %d\n " , __func__, hparams.n_max_tokens );
395
- printf ( " %s: n_embd = %d\n " , __func__, hparams.n_embd );
396
- printf ( " %s: n_intermediate = %d\n " , __func__, hparams.n_intermediate );
397
- printf ( " %s: n_head = %d\n " , __func__, hparams.n_head );
398
- printf ( " %s: n_layer = %d\n " , __func__, hparams.n_layer );
399
- printf ( " %s: layer_norm_eps = %g\n " , __func__, hparams.layer_norm_eps );
400
- printf ( " \n " );
393
+ fprintf (stderr, " %s: n_vocab = %d\n " , __func__, hparams.n_vocab );
394
+ fprintf (stderr, " %s: n_max_tokens = %d\n " , __func__, hparams.n_max_tokens );
395
+ fprintf (stderr, " %s: n_embd = %d\n " , __func__, hparams.n_embd );
396
+ fprintf (stderr, " %s: n_intermediate = %d\n " , __func__, hparams.n_intermediate );
397
+ fprintf (stderr, " %s: n_head = %d\n " , __func__, hparams.n_head );
398
+ fprintf (stderr, " %s: n_layer = %d\n " , __func__, hparams.n_layer );
399
+ fprintf (stderr, " %s: layer_norm_eps = %g\n " , __func__, hparams.layer_norm_eps );
400
+ fprintf (stderr, " \n " );
401
401
}
402
402
}
403
403
@@ -432,7 +432,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
432
432
size_t tensor_size = ggml_nbytes (cur);
433
433
buffer_size += tensor_size;
434
434
if (verbosity >= 2 ) {
435
- printf ( " %s: tensor[%d]: type = %s, n_dims = %d, name = %s, offset=%zu, type=%d\n " , __func__, i,
435
+ fprintf (stderr, " %s: tensor[%d]: type = %s, n_dims = %d, name = %s, offset=%zu, type=%d\n " , __func__, i,
436
436
ggml_type_name (cur->type ), ggml_n_dims (cur), cur->name , offset, cur->type );
437
437
}
438
438
}
@@ -445,7 +445,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
445
445
if (!new_bert->backend ) {
446
446
fprintf (stderr, " %s: ggml_backend_cuda_init() failed\n " , __func__);
447
447
} else {
448
- printf ( " %s: using CUDA backend\n " , __func__);
448
+ fprintf (stderr, " %s: using CUDA backend\n " , __func__);
449
449
}
450
450
}
451
451
#endif
@@ -460,7 +460,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
460
460
// fall back to CPU backend
461
461
if (!new_bert->backend ) {
462
462
new_bert->backend = ggml_backend_cpu_init ();
463
- printf ( " %s: using CPU backend\n " , __func__);
463
+ fprintf (stderr, " %s: using CPU backend\n " , __func__);
464
464
}
465
465
466
466
// load tensors
@@ -486,7 +486,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
486
486
// open model gguf file
487
487
auto fin = std::ifstream (fname, std::ios::binary);
488
488
if (!fin) {
489
- printf ( " cannot open model file for loading tensors\n " );
489
+ fprintf (stderr, " cannot open model file for loading tensors\n " );
490
490
delete new_bert;
491
491
return nullptr ;
492
492
}
@@ -514,7 +514,7 @@ struct bert_ctx * bert_load_from_file(const char *fname, bool use_cpu) {
514
514
const size_t offset = gguf_get_data_offset (ctx_gguf) + gguf_get_tensor_offset (ctx_gguf, i);
515
515
fin.seekg (offset, std::ios::beg);
516
516
if (!fin) {
517
- printf ( " %s: failed to seek for tensor %s\n " , __func__, name);
517
+ fprintf (stderr, " %s: failed to seek for tensor %s\n " , __func__, name);
518
518
bert_free (new_bert);
519
519
return nullptr ;
520
520
}
@@ -612,7 +612,7 @@ void bert_allocate_buffers(bert_ctx * ctx, int32_t n_max_tokens, int32_t batch_s
612
612
ctx->compute_alloc = ggml_allocr_new_from_buffer (ctx->compute_buffer );
613
613
614
614
if (verbosity >= 1 ) {
615
- printf ( " %s: compute allocated memory: %.2f MB\n\n " , __func__, compute_memory_buffer_size / 1024.0 / 1024.0 );
615
+ fprintf (stderr, " %s: compute allocated memory: %.2f MB\n\n " , __func__, compute_memory_buffer_size / 1024.0 / 1024.0 );
616
616
}
617
617
}
618
618
0 commit comments