Skip to content

Commit 5c0677a

Browse files
ggerganovSilver267
authored andcommitted
server : passthrough the /models endpoint during loading (ggml-org#13535)
* server : passthrough the /models endpoint during loading * server : update readme + return json for "meta" field
1 parent 3be533d commit 5c0677a

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

tools/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,7 @@ To know the `id` of the adapter, use GET `/lora-adapters`
10521052

10531053
Returns information about the loaded model. See [OpenAI Models API documentation](https://platform.openai.com/docs/api-reference/models).
10541054

1055-
The returned list always has one single element.
1055+
The returned list always has one single element. The `meta` field can be `null` (for example, while the model is still loading).
10561056

10571057
By default, model `id` field is the path to model file, specified via `-m`. You can set a custom value for model `id` field via `--alias` argument. For example, `--alias gpt-4o-mini`.
10581058

tools/server/server.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3711,6 +3711,9 @@ int main(int argc, char ** argv) {
37113711
if (req.path == "/" || tmp.back() == "html") {
37123712
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
37133713
res.status = 503;
3714+
} else if (req.path == "/models" || req.path == "/v1/models") {
3715+
// allow the models endpoint to be accessed during loading
3716+
return true;
37143717
} else {
37153718
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
37163719
}
@@ -4369,7 +4372,13 @@ int main(int argc, char ** argv) {
43694372
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
43704373
};
43714374

4372-
const auto handle_models = [&params, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
4375+
const auto handle_models = [&params, &ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
4376+
server_state current_state = state.load();
4377+
json model_meta = nullptr;
4378+
if (current_state == SERVER_STATE_READY) {
4379+
model_meta = ctx_server.model_meta();
4380+
}
4381+
43734382
json models = {
43744383
{"object", "list"},
43754384
{"data", {
@@ -4378,7 +4387,7 @@ int main(int argc, char ** argv) {
43784387
{"object", "model"},
43794388
{"created", std::time(0)},
43804389
{"owned_by", "llamacpp"},
4381-
{"meta", ctx_server.model_meta()}
4390+
{"meta", model_meta},
43824391
},
43834392
}}
43844393
};

0 commit comments

Comments
 (0)