Skip to content

Commit 9fdb124

Browse files
authored
common : add missing env var for speculative (#10801)
1 parent 5555c0c commit 9fdb124

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

common/arg.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,35 +2083,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20832083
[](common_params & params, int value) {
20842084
params.speculative.n_max = value;
20852085
}
2086-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
2086+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MAX"));
20872087
add_opt(common_arg(
20882088
{"--draft-min", "--draft-n-min"}, "N",
20892089
string_format("minimum number of draft tokens to use for speculative decoding (default: %d)", params.speculative.n_min),
20902090
[](common_params & params, int value) {
20912091
params.speculative.n_min = value;
20922092
}
2093-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
2093+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MIN"));
20942094
add_opt(common_arg(
20952095
{"--draft-p-split"}, "P",
20962096
string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split),
20972097
[](common_params & params, const std::string & value) {
20982098
params.speculative.p_split = std::stof(value);
20992099
}
2100-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}));
2100+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT"));
21012101
add_opt(common_arg(
21022102
{"--draft-p-min"}, "P",
21032103
string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min),
21042104
[](common_params & params, const std::string & value) {
21052105
params.speculative.p_min = std::stof(value);
21062106
}
2107-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2107+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_P_MIN"));
21082108
add_opt(common_arg(
21092109
{"-cd", "--ctx-size-draft"}, "N",
21102110
string_format("size of the prompt context for the draft model (default: %d, 0 = loaded from model)", params.speculative.n_ctx),
21112111
[](common_params & params, int value) {
21122112
params.speculative.n_ctx = value;
21132113
}
2114-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2114+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CTX_SIZE_DRAFT"));
21152115
add_opt(common_arg(
21162116
{"-devd", "--device-draft"}, "<dev1,dev2,..>",
21172117
"comma-separated list of devices to use for offloading the draft model (none = don't offload)\n"
@@ -2131,14 +2131,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21312131
fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n");
21322132
}
21332133
}
2134-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2134+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_N_GPU_LAYERS_DRAFT"));
21352135
add_opt(common_arg(
21362136
{"-md", "--model-draft"}, "FNAME",
21372137
"draft model for speculative decoding (default: unused)",
21382138
[](common_params & params, const std::string & value) {
21392139
params.speculative.model = value;
21402140
}
2141-
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2141+
).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT"));
21422142

21432143
return ctx_arg;
21442144
}

0 commit comments

Comments
 (0)