Skip to content

Commit d58644f

Browse files
ngxsonarthw
authored andcommitted
server : support reading arguments from environment variables (ggml-org#9105)
* server : support reading arguments from environment variables * add -fa and -dt * readme : specify non-arg env var
1 parent 8e94e91 commit d58644f

File tree

4 files changed

+80
-8
lines changed

4 files changed

+80
-8
lines changed

common/common.cpp

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,41 @@
7777

7878
using json = nlohmann::ordered_json;
7979

80+
//
81+
// Environment variable utils
82+
//
83+
84+
template<typename T>
85+
static typename std::enable_if<std::is_same<T, std::string>::value, void>::type
86+
get_env(std::string name, T & target) {
87+
char * value = std::getenv(name.c_str());
88+
target = value ? std::string(value) : target;
89+
}
90+
91+
template<typename T>
92+
static typename std::enable_if<!std::is_same<T, bool>::value && std::is_integral<T>::value, void>::type
93+
get_env(std::string name, T & target) {
94+
char * value = std::getenv(name.c_str());
95+
target = value ? std::stoi(value) : target;
96+
}
97+
98+
template<typename T>
99+
static typename std::enable_if<std::is_floating_point<T>::value, void>::type
100+
get_env(std::string name, T & target) {
101+
char * value = std::getenv(name.c_str());
102+
target = value ? std::stof(value) : target;
103+
}
104+
105+
template<typename T>
106+
static typename std::enable_if<std::is_same<T, bool>::value, void>::type
107+
get_env(std::string name, T & target) {
108+
char * value = std::getenv(name.c_str());
109+
if (value) {
110+
std::string val(value);
111+
target = val == "1" || val == "true";
112+
}
113+
}
114+
80115
//
81116
// CPU utils
82117
//
@@ -220,12 +255,6 @@ int32_t cpu_get_num_math() {
220255
// CLI argument parsing
221256
//
222257

223-
void gpt_params_handle_hf_token(gpt_params & params) {
224-
if (params.hf_token.empty() && std::getenv("HF_TOKEN")) {
225-
params.hf_token = std::getenv("HF_TOKEN");
226-
}
227-
}
228-
229258
void gpt_params_handle_model_default(gpt_params & params) {
230259
if (!params.hf_repo.empty()) {
231260
// short-hand to avoid specifying --hf-file -> default it to --model
@@ -273,7 +302,9 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
273302

274303
gpt_params_handle_model_default(params);
275304

276-
gpt_params_handle_hf_token(params);
305+
if (params.hf_token.empty()) {
306+
get_env("HF_TOKEN", params.hf_token);
307+
}
277308

278309
if (params.escape) {
279310
string_process_escapes(params.prompt);
@@ -293,6 +324,25 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
293324
return true;
294325
}
295326

327+
void gpt_params_parse_from_env(gpt_params & params) {
328+
// we only care about server-related params for now
329+
get_env("LLAMA_ARG_MODEL", params.model);
330+
get_env("LLAMA_ARG_THREADS", params.n_threads);
331+
get_env("LLAMA_ARG_CTX_SIZE", params.n_ctx);
332+
get_env("LLAMA_ARG_N_PARALLEL", params.n_parallel);
333+
get_env("LLAMA_ARG_BATCH", params.n_batch);
334+
get_env("LLAMA_ARG_UBATCH", params.n_ubatch);
335+
get_env("LLAMA_ARG_N_GPU_LAYERS", params.n_gpu_layers);
336+
get_env("LLAMA_ARG_THREADS_HTTP", params.n_threads_http);
337+
get_env("LLAMA_ARG_CHAT_TEMPLATE", params.chat_template);
338+
get_env("LLAMA_ARG_N_PREDICT", params.n_predict);
339+
get_env("LLAMA_ARG_ENDPOINT_METRICS", params.endpoint_metrics);
340+
get_env("LLAMA_ARG_ENDPOINT_SLOTS", params.endpoint_slots);
341+
get_env("LLAMA_ARG_EMBEDDINGS", params.embedding);
342+
get_env("LLAMA_ARG_FLASH_ATTN", params.flash_attn);
343+
get_env("LLAMA_ARG_DEFRAG_THOLD", params.defrag_thold);
344+
}
345+
296346
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
297347
const auto params_org = params; // the example can modify the default params
298348

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ struct gpt_params {
267267
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
268268
};
269269

270-
void gpt_params_handle_hf_token(gpt_params & params);
270+
void gpt_params_parse_from_env(gpt_params & params);
271271
void gpt_params_handle_model_default(gpt_params & params);
272272

273273
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);

examples/server/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,25 @@ logging:
247247
--log-append Don't truncate the old log file.
248248
```
249249

250+
Available environment variables (if specified, these variables will override parameters specified in arguments):
251+
252+
- `LLAMA_CACHE` (cache directory, used by `--hf-repo`)
253+
- `HF_TOKEN` (Hugging Face access token, used when accessing a gated model with `--hf-repo`)
254+
- `LLAMA_ARG_MODEL`
255+
- `LLAMA_ARG_THREADS`
256+
- `LLAMA_ARG_CTX_SIZE`
257+
- `LLAMA_ARG_N_PARALLEL`
258+
- `LLAMA_ARG_BATCH`
259+
- `LLAMA_ARG_UBATCH`
260+
- `LLAMA_ARG_N_GPU_LAYERS`
261+
- `LLAMA_ARG_THREADS_HTTP`
262+
- `LLAMA_ARG_CHAT_TEMPLATE`
263+
- `LLAMA_ARG_N_PREDICT`
264+
- `LLAMA_ARG_ENDPOINT_METRICS`
265+
- `LLAMA_ARG_ENDPOINT_SLOTS`
266+
- `LLAMA_ARG_EMBEDDINGS`
267+
- `LLAMA_ARG_FLASH_ATTN`
268+
- `LLAMA_ARG_DEFRAG_THOLD`
250269

251270
## Build
252271

examples/server/server.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2507,6 +2507,9 @@ int main(int argc, char ** argv) {
25072507
return 1;
25082508
}
25092509

2510+
// parse arguments from environment variables
2511+
gpt_params_parse_from_env(params);
2512+
25102513
// TODO: not great to use extern vars
25112514
server_log_json = params.log_json;
25122515
server_verbose = params.verbosity > 0;

0 commit comments

Comments
 (0)