77
77
78
78
using json = nlohmann::ordered_json;
79
79
80
+ //
81
+ // Environment variable utils
82
+ //
83
+
84
+ template <typename T>
85
+ static typename std::enable_if<std::is_same<T, std::string>::value, void >::type
86
+ get_env (std::string name, T & target) {
87
+ char * value = std::getenv (name.c_str ());
88
+ target = value ? std::string (value) : target;
89
+ }
90
+
91
+ template <typename T>
92
+ static typename std::enable_if<!std::is_same<T, bool >::value && std::is_integral<T>::value, void >::type
93
+ get_env (std::string name, T & target) {
94
+ char * value = std::getenv (name.c_str ());
95
+ target = value ? std::stoi (value) : target;
96
+ }
97
+
98
+ template <typename T>
99
+ static typename std::enable_if<std::is_floating_point<T>::value, void >::type
100
+ get_env (std::string name, T & target) {
101
+ char * value = std::getenv (name.c_str ());
102
+ target = value ? std::stof (value) : target;
103
+ }
104
+
105
+ template <typename T>
106
+ static typename std::enable_if<std::is_same<T, bool >::value, void >::type
107
+ get_env (std::string name, T & target) {
108
+ char * value = std::getenv (name.c_str ());
109
+ if (value) {
110
+ std::string val (value);
111
+ target = val == " 1" || val == " true" ;
112
+ }
113
+ }
114
+
80
115
//
81
116
// CPU utils
82
117
//
@@ -220,12 +255,6 @@ int32_t cpu_get_num_math() {
220
255
// CLI argument parsing
221
256
//
222
257
223
- void gpt_params_handle_hf_token (gpt_params & params) {
224
- if (params.hf_token .empty () && std::getenv (" HF_TOKEN" )) {
225
- params.hf_token = std::getenv (" HF_TOKEN" );
226
- }
227
- }
228
-
229
258
void gpt_params_handle_model_default (gpt_params & params) {
230
259
if (!params.hf_repo .empty ()) {
231
260
// short-hand to avoid specifying --hf-file -> default it to --model
@@ -273,7 +302,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
273
302
274
303
gpt_params_handle_model_default (params);
275
304
276
- gpt_params_handle_hf_token (params);
305
+ if (params.hf_token .empty ()) {
306
+ get_env (" HF_TOKEN" , params.hf_token );
307
+ }
277
308
278
309
if (params.escape ) {
279
310
string_process_escapes (params.prompt );
@@ -293,6 +324,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
293
324
return true ;
294
325
}
295
326
327
+ void gpt_params_parse_from_env (gpt_params & params) {
328
+ // we only care about server-related params for now
329
+ get_env (" LLAMA_ARG_MODEL" , params.model );
330
+ get_env (" LLAMA_ARG_THREADS" , params.n_threads );
331
+ get_env (" LLAMA_ARG_CTX_SIZE" , params.n_ctx );
332
+ get_env (" LLAMA_ARG_N_PARALLEL" , params.n_parallel );
333
+ get_env (" LLAMA_ARG_BATCH" , params.n_batch );
334
+ get_env (" LLAMA_ARG_UBATCH" , params.n_ubatch );
335
+ get_env (" LLAMA_ARG_N_GPU_LAYERS" , params.n_gpu_layers );
336
+ get_env (" LLAMA_ARG_THREADS_HTTP" , params.n_threads_http );
337
+ get_env (" LLAMA_ARG_CHAT_TEMPLATE" , params.chat_template );
338
+ get_env (" LLAMA_ARG_N_PREDICT" , params.n_predict );
339
+ get_env (" LLAMA_ARG_ENDPOINT_METRICS" , params.endpoint_metrics );
340
+ get_env (" LLAMA_ARG_ENDPOINT_SLOTS" , params.endpoint_slots );
341
+ get_env (" LLAMA_ARG_EMBEDDINGS" , params.embedding );
342
+ get_env (" LLAMA_ARG_FLASH_ATTN" , params.flash_attn );
343
+ get_env (" LLAMA_ARG_DEFRAG_THOLD" , params.defrag_thold );
344
+ }
345
+
296
346
bool gpt_params_parse (int argc, char ** argv, gpt_params & params) {
297
347
const auto params_org = params; // the example can modify the default params
298
348
0 commit comments