@@ -107,7 +107,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
107
107
std::string arg;
108
108
gpt_params default_params;
109
109
const std::string arg_prefix = " --" ;
110
- llama_sampling_params & sparams = params.sampling_params ;
110
+ llama_sampling_params & sparams = params.sparams ;
111
111
112
112
for (int i = 1 ; i < argc; i++) {
113
113
arg = argv[i];
@@ -241,25 +241,26 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
241
241
invalid_param = true ;
242
242
break ;
243
243
}
244
- sparams.repeat_last_n = std::stoi (argv[i]);
244
+ sparams.penalty_last_n = std::stoi (argv[i]);
245
+ sparams.n_prev = std::max (sparams.n_prev , sparams.penalty_last_n );
245
246
} else if (arg == " --repeat-penalty" ) {
246
247
if (++i >= argc) {
247
248
invalid_param = true ;
248
249
break ;
249
250
}
250
- sparams.repeat_penalty = std::stof (argv[i]);
251
+ sparams.penalty_repeat = std::stof (argv[i]);
251
252
} else if (arg == " --frequency-penalty" ) {
252
253
if (++i >= argc) {
253
254
invalid_param = true ;
254
255
break ;
255
256
}
256
- sparams.frequency_penalty = std::stof (argv[i]);
257
+ sparams.penalty_freq = std::stof (argv[i]);
257
258
} else if (arg == " --presence-penalty" ) {
258
259
if (++i >= argc) {
259
260
invalid_param = true ;
260
261
break ;
261
262
}
262
- sparams.presence_penalty = std::stof (argv[i]);
263
+ sparams.penalty_present = std::stof (argv[i]);
263
264
} else if (arg == " --mirostat" ) {
264
265
if (++i >= argc) {
265
266
invalid_param = true ;
@@ -572,7 +573,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
572
573
invalid_param = true ;
573
574
break ;
574
575
}
575
- params .grammar = argv[i];
576
+ sparams .grammar = argv[i];
576
577
} else if (arg == " --grammar-file" ) {
577
578
if (++i >= argc) {
578
579
invalid_param = true ;
@@ -587,7 +588,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
587
588
std::copy (
588
589
std::istreambuf_iterator<char >(file),
589
590
std::istreambuf_iterator<char >(),
590
- std::back_inserter (params .grammar )
591
+ std::back_inserter (sparams .grammar )
591
592
);
592
593
#ifndef LOG_DISABLE_LOGS
593
594
// Parse args for logging parameters
@@ -640,7 +641,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
640
641
}
641
642
642
643
void gpt_print_usage (int /* argc*/ , char ** argv, const gpt_params & params) {
643
- const llama_sampling_params & sparams = params.sampling_params ;
644
+ const llama_sampling_params & sparams = params.sparams ;
644
645
645
646
printf (" usage: %s [options]\n " , argv[0 ]);
646
647
printf (" \n " );
@@ -678,10 +679,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
678
679
printf (" --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n " , (double )sparams.top_p );
679
680
printf (" --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n " , (double )sparams.tfs_z );
680
681
printf (" --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n " , (double )sparams.typical_p );
681
- printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.repeat_last_n );
682
- printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.repeat_penalty );
683
- printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.presence_penalty );
684
- printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.frequency_penalty );
682
+ printf (" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n " , sparams.penalty_last_n );
683
+ printf (" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n " , (double )sparams.penalty_repeat );
684
+ printf (" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_present );
685
+ printf (" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n " , (double )sparams.penalty_freq );
685
686
printf (" --mirostat N use Mirostat sampling.\n " );
686
687
printf (" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n " );
687
688
printf (" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n " , sparams.mirostat );
@@ -878,7 +879,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
878
879
}
879
880
880
881
if (params.ignore_eos ) {
881
- params.sampling_params .logit_bias [llama_token_eos (lctx)] = -INFINITY;
882
+ params.sparams .logit_bias [llama_token_eos (lctx)] = -INFINITY;
882
883
}
883
884
884
885
{
@@ -1123,28 +1124,28 @@ std::string get_sortable_timestamp() {
1123
1124
1124
1125
void dump_non_result_info_yaml (FILE * stream, const gpt_params & params, const llama_context * lctx,
1125
1126
const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc) {
1126
- const llama_sampling_params & sparams = params.sampling_params ;
1127
+ const llama_sampling_params & sparams = params.sparams ;
1127
1128
1128
1129
fprintf (stream, " build_commit: %s\n " , BUILD_COMMIT);
1129
1130
fprintf (stream, " build_number: %d\n " , BUILD_NUMBER);
1130
- fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1131
- fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1132
- fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1133
- fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
1131
+ fprintf (stream, " cpu_has_arm_fma: %s\n " , ggml_cpu_has_arm_fma () ? " true" : " false" );
1132
+ fprintf (stream, " cpu_has_avx: %s\n " , ggml_cpu_has_avx () ? " true" : " false" );
1133
+ fprintf (stream, " cpu_has_avx2: %s\n " , ggml_cpu_has_avx2 () ? " true" : " false" );
1134
+ fprintf (stream, " cpu_has_avx512: %s\n " , ggml_cpu_has_avx512 () ? " true" : " false" );
1134
1135
fprintf (stream, " cpu_has_avx512_vbmi: %s\n " , ggml_cpu_has_avx512_vbmi () ? " true" : " false" );
1135
1136
fprintf (stream, " cpu_has_avx512_vnni: %s\n " , ggml_cpu_has_avx512_vnni () ? " true" : " false" );
1136
- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1137
- fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1138
- fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1139
- fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1140
- fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1141
- fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1142
- fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1143
- fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1144
- fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1145
- fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1146
- fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1147
- fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
1137
+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1138
+ fprintf (stream, " cpu_has_cublas: %s\n " , ggml_cpu_has_cublas () ? " true" : " false" );
1139
+ fprintf (stream, " cpu_has_clblast: %s\n " , ggml_cpu_has_clblast () ? " true" : " false" );
1140
+ fprintf (stream, " cpu_has_fma: %s\n " , ggml_cpu_has_fma () ? " true" : " false" );
1141
+ fprintf (stream, " cpu_has_gpublas: %s\n " , ggml_cpu_has_gpublas () ? " true" : " false" );
1142
+ fprintf (stream, " cpu_has_neon: %s\n " , ggml_cpu_has_neon () ? " true" : " false" );
1143
+ fprintf (stream, " cpu_has_f16c: %s\n " , ggml_cpu_has_f16c () ? " true" : " false" );
1144
+ fprintf (stream, " cpu_has_fp16_va: %s\n " , ggml_cpu_has_fp16_va () ? " true" : " false" );
1145
+ fprintf (stream, " cpu_has_wasm_simd: %s\n " , ggml_cpu_has_wasm_simd () ? " true" : " false" );
1146
+ fprintf (stream, " cpu_has_blas: %s\n " , ggml_cpu_has_blas () ? " true" : " false" );
1147
+ fprintf (stream, " cpu_has_sse3: %s\n " , ggml_cpu_has_sse3 () ? " true" : " false" );
1148
+ fprintf (stream, " cpu_has_vsx: %s\n " , ggml_cpu_has_vsx () ? " true" : " false" );
1148
1149
1149
1150
#ifdef NDEBUG
1150
1151
fprintf (stream, " debug: false\n " );
@@ -1178,8 +1179,8 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
1178
1179
fprintf (stream, " ctx_size: %d # default: 512\n " , params.n_ctx );
1179
1180
fprintf (stream, " escape: %s # default: false\n " , params.escape ? " true" : " false" );
1180
1181
fprintf (stream, " file: # never logged, see prompt instead. Can still be specified for input.\n " );
1181
- fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.frequency_penalty );
1182
- dump_string_yaml_multiline (stream, " grammar" , params .grammar .c_str ());
1182
+ fprintf (stream, " frequency_penalty: %f # default: 0.0 \n " , sparams.penalty_freq );
1183
+ dump_string_yaml_multiline (stream, " grammar" , sparams .grammar .c_str ());
1183
1184
fprintf (stream, " grammar-file: # never logged, see grammar instead. Can still be specified for input.\n " );
1184
1185
fprintf (stream, " hellaswag: %s # default: false\n " , params.hellaswag ? " true" : " false" );
1185
1186
fprintf (stream, " hellaswag_tasks: %zu # default: 400\n " , params.hellaswag_tasks );
@@ -1238,14 +1239,14 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
1238
1239
fprintf (stream, " numa: %s # default: false\n " , params.numa ? " true" : " false" );
1239
1240
fprintf (stream, " ppl_output_type: %d # default: 0\n " , params.ppl_output_type );
1240
1241
fprintf (stream, " ppl_stride: %d # default: 0\n " , params.ppl_stride );
1241
- fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.presence_penalty );
1242
+ fprintf (stream, " presence_penalty: %f # default: 0.0\n " , sparams.penalty_present );
1242
1243
dump_string_yaml_multiline (stream, " prompt" , params.prompt .c_str ());
1243
1244
fprintf (stream, " prompt_cache: %s\n " , params.path_prompt_cache .c_str ());
1244
1245
fprintf (stream, " prompt_cache_all: %s # default: false\n " , params.prompt_cache_all ? " true" : " false" );
1245
1246
fprintf (stream, " prompt_cache_ro: %s # default: false\n " , params.prompt_cache_ro ? " true" : " false" );
1246
1247
dump_vector_int_yaml (stream, " prompt_tokens" , prompt_tokens);
1247
1248
fprintf (stream, " random_prompt: %s # default: false\n " , params.random_prompt ? " true" : " false" );
1248
- fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.repeat_penalty );
1249
+ fprintf (stream, " repeat_penalty: %f # default: 1.1\n " , sparams.penalty_repeat );
1249
1250
1250
1251
fprintf (stream, " reverse_prompt:\n " );
1251
1252
for (std::string ap : params.antiprompt ) {
0 commit comments