@@ -264,6 +264,10 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
264
264
params.kv_overrides .back ().key [0 ] = 0 ;
265
265
}
266
266
267
+ if (params.sparams .seed == LLAMA_DEFAULT_SEED) {
268
+ params.sparams .seed = time (NULL );
269
+ }
270
+
267
271
return true ;
268
272
}
269
273
@@ -294,8 +298,6 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
294
298
295
299
if (arg == " -s" || arg == " --seed" ) {
296
300
CHECK_ARG
297
- // TODO: this is temporary, in the future the sampling state will be moved fully to llama_sampling_context.
298
- params.seed = std::stoul (argv[i]);
299
301
sparams.seed = std::stoul (argv[i]);
300
302
return true ;
301
303
}
@@ -1404,7 +1406,6 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
1404
1406
options.push_back ({ " *" , " --verbose-prompt" , " print a verbose prompt before generation (default: %s)" , params.verbose_prompt ? " true" : " false" });
1405
1407
options.push_back ({ " *" , " --no-display-prompt" , " don't print prompt at generation (default: %s)" , !params.display_prompt ? " true" : " false" });
1406
1408
options.push_back ({ " *" , " -co, --color" , " colorise output to distinguish prompt and user input from generations (default: %s)" , params.use_color ? " true" : " false" });
1407
- options.push_back ({ " *" , " -s, --seed SEED" , " RNG seed (default: %d, use random seed for < 0)" , params.seed });
1408
1409
options.push_back ({ " *" , " -t, --threads N" , " number of threads to use during generation (default: %d)" , params.n_threads });
1409
1410
options.push_back ({ " *" , " -tb, --threads-batch N" , " number of threads to use during batch and prompt processing (default: same as --threads)" });
1410
1411
options.push_back ({ " speculative" , " -td, --threads-draft N" , " number of threads to use during generation (default: same as --threads)" });
@@ -1455,6 +1456,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
1455
1456
" --spm-infill" , " use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: %s)" , params.spm_infill ? " enabled" : " disabled" });
1456
1457
1457
1458
options.push_back ({ " sampling" });
1459
+ options.push_back ({ " *" , " -s, --seed SEED" , " RNG seed (default: %d, use random seed for < 0)" , sparams.seed });
1458
1460
options.push_back ({ " *" , " --samplers SAMPLERS" , " samplers that will be used for generation in the order, separated by \' ;\'\n "
1459
1461
" (default: %s)" , sampler_type_names.c_str () });
1460
1462
options.push_back ({ " *" , " --sampling-seq SEQUENCE" ,
@@ -2199,7 +2201,6 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
2199
2201
cparams.n_ubatch = params.n_ubatch ;
2200
2202
cparams.n_threads = params.n_threads ;
2201
2203
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch ;
2202
- cparams.seed = params.seed ;
2203
2204
cparams.logits_all = params.logits_all ;
2204
2205
cparams.embeddings = params.embedding ;
2205
2206
cparams.rope_scaling_type = params.rope_scaling_type ;
@@ -3210,7 +3211,6 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
3210
3211
3211
3212
fprintf (stream, " rope_freq_base: %f # default: 10000.0\n " , params.rope_freq_base );
3212
3213
fprintf (stream, " rope_freq_scale: %f # default: 1.0\n " , params.rope_freq_scale );
3213
- fprintf (stream, " seed: %u # default: -1 (random seed)\n " , params.seed );
3214
3214
fprintf (stream, " simple_io: %s # default: false\n " , params.simple_io ? " true" : " false" );
3215
3215
fprintf (stream, " cont_batching: %s # default: false\n " , params.cont_batching ? " true" : " false" );
3216
3216
fprintf (stream, " flash_attn: %s # default: false\n " , params.flash_attn ? " true" : " false" );
0 commit comments