Skip to content

Commit d0e0d1a

Browse files
JSON: [key] -> .at(key), assert() -> GGML_ASSERT
1 parent 3855416 commit d0e0d1a

File tree

4 files changed

+84
-74
lines changed

4 files changed

+84
-74
lines changed

common/common.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "common.h"
2+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
3+
#define JSON_ASSERT GGML_ASSERT
24
#include "json.hpp"
35
#include "json-schema-to-grammar.h"
46
#include "llama.h"
@@ -1964,18 +1966,18 @@ static bool llama_download_file(const std::string & url, const std::string & pat
19641966
try {
19651967
metadata_in >> metadata;
19661968
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
1967-
if (metadata.contains("url") && metadata["url"].is_string()) {
1968-
auto previous_url = metadata["url"].get<std::string>();
1969+
if (metadata.contains("url") && metadata.at("url").is_string()) {
1970+
auto previous_url = metadata.at("url").get<std::string>();
19691971
if (previous_url != url) {
19701972
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
19711973
return false;
19721974
}
19731975
}
1974-
if (metadata.contains("etag") && metadata["etag"].is_string()) {
1975-
etag = metadata["etag"];
1976+
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
1977+
etag = metadata.at("etag");
19761978
}
1977-
if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) {
1978-
last_modified = metadata["lastModified"];
1979+
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
1980+
last_modified = metadata.at("lastModified");
19791981
}
19801982
} catch (const nlohmann::json::exception & e) {
19811983
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());

common/json-schema-to-grammar.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
#pragma once
2+
3+
#include "ggml.h"
4+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
5+
#define JSON_ASSERT GGML_ASSERT
26
#include "json.hpp"
37

48
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);

examples/server/server.cpp

Lines changed: 52 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
// increase max payload length to allow use of larger context size
1313
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
1414
#include "httplib.h"
15+
// Change JSON_ASSERT from assert() to GGML_ASSERT:
16+
#define JSON_ASSERT GGML_ASSERT
1517
#include "json.hpp"
1618

1719
// auto generated files (update with ./deps.sh)
@@ -745,7 +747,7 @@ struct server_context {
745747
}
746748

747749
default_generation_settings_for_props = get_formated_generation(slots.front());
748-
default_generation_settings_for_props["seed"] = -1;
750+
default_generation_settings_for_props.at("seed") = -1;
749751

750752
// the update_slots() logic will always submit a maximum of n_batch tokens
751753
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
@@ -859,7 +861,7 @@ struct server_context {
859861
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
860862

861863
// process "json_schema" and "grammar"
862-
if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) {
864+
if (data.contains("json_schema") && !data.at("json_schema").is_null() && data.contains("grammar") && !data.at("grammar").is_null()) {
863865
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
864866
return false;
865867
} else if (data.contains("json_schema") && !data.contains("grammar")) {
@@ -1343,12 +1345,12 @@ struct server_context {
13431345
}
13441346
slot.n_sent_token_probs = probs_stop_pos;
13451347

1346-
res.data["completion_probabilities"] = probs_vector_to_json(ctx, probs_output);
1348+
res.data.at("completion_probabilities") = probs_vector_to_json(ctx, probs_output);
13471349
}
13481350

13491351
if (slot.oaicompat) {
1350-
res.data["oaicompat_token_ctr"] = slot.n_decoded;
1351-
res.data["model"] = slot.oaicompat_model;
1352+
res.data.at("oaicompat_token_ctr") = slot.n_decoded;
1353+
res.data.at("model") = slot.oaicompat_model;
13521354
}
13531355

13541356
queue_results.send(res);
@@ -1393,12 +1395,12 @@ struct server_context {
13931395
slot.generated_token_probs.end());
13941396
}
13951397

1396-
res.data["completion_probabilities"] = probs_vector_to_json(ctx, probs);
1398+
res.data.at("completion_probabilities") = probs_vector_to_json(ctx, probs);
13971399
}
13981400

13991401
if (slot.oaicompat) {
1400-
res.data["oaicompat_token_ctr"] = slot.n_decoded;
1401-
res.data["model"] = slot.oaicompat_model;
1402+
res.data.at("oaicompat_token_ctr") = slot.n_decoded;
1403+
res.data.at("model") = slot.oaicompat_model;
14021404
}
14031405

14041406
queue_results.send(res);
@@ -1512,7 +1514,7 @@ struct server_context {
15121514
// add subtasks
15131515
for (int i = 0; i < prompt_count; i++) {
15141516
json subtask_data = multiprompt_task.data;
1515-
subtask_data["prompt"] = subtask_data["prompt"][i];
1517+
subtask_data.at("prompt") = subtask_data.at("prompt")[i];
15161518

15171519
// subtasks inherit everything else (infill mode, embedding mode, etc.)
15181520
request_completion(subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill, multiprompt_task.embedding);
@@ -1532,7 +1534,7 @@ struct server_context {
15321534
}
15331535

15341536
if (task.data.contains("system_prompt")) {
1535-
system_prompt_set(task.data["system_prompt"]);
1537+
system_prompt_set(task.data.at("system_prompt"));
15361538

15371539
for (server_slot & slot : slots) {
15381540
slot.n_past = 0;
@@ -1575,11 +1577,11 @@ struct server_context {
15751577

15761578
for (server_slot & slot : slots) {
15771579
json slot_data = get_formated_generation(slot);
1578-
slot_data["id"] = slot.id;
1579-
slot_data["id_task"] = slot.id_task;
1580-
slot_data["state"] = slot.state;
1581-
slot_data["prompt"] = slot.prompt;
1582-
slot_data["next_token"] = {
1580+
slot_data.at("id") = slot.id;
1581+
slot_data.at("id_task") = slot.id_task;
1582+
slot_data.at("state") = slot.state;
1583+
slot_data.at("prompt") = slot.prompt;
1584+
slot_data.at("next_token") = {
15831585
{"has_next_token", slot.has_next_token},
15841586
{"n_remain", slot.n_remaining},
15851587
{"n_decoded", slot.n_decoded},
@@ -1589,7 +1591,7 @@ struct server_context {
15891591
{"stopping_word", slot.stopping_word},
15901592
};
15911593

1592-
if (slot_data["state"] == SLOT_STATE_IDLE) {
1594+
if (slot_data.at("state") == SLOT_STATE_IDLE) {
15931595
n_idle_slots++;
15941596
} else {
15951597
n_processing_slots++;
@@ -1644,7 +1646,7 @@ struct server_context {
16441646
} break;
16451647
case SERVER_TASK_TYPE_SLOT_SAVE:
16461648
{
1647-
int id_slot = task.data["id_slot"];
1649+
int id_slot = task.data.at("id_slot");
16481650
server_slot * slot = get_slot(id_slot);
16491651
if (slot == nullptr) {
16501652
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -1654,8 +1656,8 @@ struct server_context {
16541656
const size_t token_count = slot->cache_tokens.size();
16551657
const int64_t t_start = ggml_time_us();
16561658

1657-
std::string filename = task.data["filename"];
1658-
std::string filepath = task.data["filepath"];
1659+
std::string filename = task.data.at("filename");
1660+
std::string filepath = task.data.at("filepath");
16591661

16601662
const size_t nwrite = llama_state_seq_save_file(ctx, filepath.c_str(), slot->id + 1, slot->cache_tokens.data(), token_count);
16611663

@@ -1679,7 +1681,7 @@ struct server_context {
16791681
} break;
16801682
case SERVER_TASK_TYPE_SLOT_RESTORE:
16811683
{
1682-
int id_slot = task.data["id_slot"];
1684+
int id_slot = task.data.at("id_slot");
16831685
server_slot * slot = get_slot(id_slot);
16841686
if (slot == nullptr) {
16851687
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -1688,8 +1690,8 @@ struct server_context {
16881690

16891691
const int64_t t_start = ggml_time_us();
16901692

1691-
std::string filename = task.data["filename"];
1692-
std::string filepath = task.data["filepath"];
1693+
std::string filename = task.data.at("filename");
1694+
std::string filepath = task.data.at("filepath");
16931695

16941696
slot->cache_tokens.resize(slot->n_ctx);
16951697
size_t token_count = 0;
@@ -1721,7 +1723,7 @@ struct server_context {
17211723
} break;
17221724
case SERVER_TASK_TYPE_SLOT_ERASE:
17231725
{
1724-
int id_slot = task.data["id_slot"];
1726+
int id_slot = task.data.at("id_slot");
17251727
server_slot * slot = get_slot(id_slot);
17261728
if (slot == nullptr) {
17271729
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
@@ -3136,8 +3138,8 @@ int main(int argc, char ** argv) {
31363138
server_task_result result = ctx_server.queue_results.recv(task.id);
31373139
ctx_server.queue_results.remove_waiting_task_id(task.id);
31383140

3139-
const int n_idle_slots = result.data["idle"];
3140-
const int n_processing_slots = result.data["processing"];
3141+
const int n_idle_slots = result.data.at("idle");
3142+
const int n_processing_slots = result.data.at("processing");
31413143

31423144
json health = {
31433145
{"status", "ok"},
@@ -3147,11 +3149,11 @@ int main(int argc, char ** argv) {
31473149

31483150
res.status = 200; // HTTP OK
31493151
if (sparams.slots_endpoint && req.has_param("include_slots")) {
3150-
health["slots"] = result.data["slots"];
3152+
health.at("slots") = result.data.at("slots");
31513153
}
31523154

31533155
if (n_idle_slots == 0) {
3154-
health["status"] = "no slot available";
3156+
health.at("status") = "no slot available";
31553157
if (req.has_param("fail_on_no_slot")) {
31563158
res.status = 503; // HTTP Service Unavailable
31573159
}
@@ -3191,7 +3193,7 @@ int main(int argc, char ** argv) {
31913193
server_task_result result = ctx_server.queue_results.recv(task.id);
31923194
ctx_server.queue_results.remove_waiting_task_id(task.id);
31933195

3194-
res.set_content(result.data["slots"].dump(), "application/json");
3196+
res.set_content(result.data.at("slots").dump(), "application/json");
31953197
res.status = 200; // HTTP OK
31963198
};
31973199

@@ -3218,32 +3220,32 @@ int main(int argc, char ** argv) {
32183220

32193221
json data = result.data;
32203222

3221-
const uint64_t n_prompt_tokens_processed = data["n_prompt_tokens_processed"];
3222-
const uint64_t t_prompt_processing = data["t_prompt_processing"];
3223+
const uint64_t n_prompt_tokens_processed = data.at("n_prompt_tokens_processed");
3224+
const uint64_t t_prompt_processing = data.at("t_prompt_processing");
32233225

3224-
const uint64_t n_tokens_predicted = data["n_tokens_predicted"];
3225-
const uint64_t t_tokens_generation = data["t_tokens_generation"];
3226+
const uint64_t n_tokens_predicted = data.at("n_tokens_predicted");
3227+
const uint64_t t_tokens_generation = data.at("t_tokens_generation");
32263228

3227-
const int32_t kv_cache_used_cells = data["kv_cache_used_cells"];
3229+
const int32_t kv_cache_used_cells = data.at("kv_cache_used_cells");
32283230

32293231
// metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
32303232
json all_metrics_def = json {
32313233
{"counter", {{
32323234
{"name", "prompt_tokens_total"},
32333235
{"help", "Number of prompt tokens processed."},
3234-
{"value", (uint64_t) data["n_prompt_tokens_processed_total"]}
3236+
{"value", (uint64_t) data.at("n_prompt_tokens_processed_total")}
32353237
}, {
32363238
{"name", "prompt_seconds_total"},
32373239
{"help", "Prompt process time"},
3238-
{"value", (uint64_t) data["t_prompt_processing_total"] / 1.e3}
3240+
{"value", (uint64_t) data.at("t_prompt_processing_total") / 1.e3}
32393241
}, {
32403242
{"name", "tokens_predicted_total"},
32413243
{"help", "Number of generation tokens processed."},
3242-
{"value", (uint64_t) data["n_tokens_predicted_total"]}
3244+
{"value", (uint64_t) data.at("n_tokens_predicted_total")}
32433245
}, {
32443246
{"name", "tokens_predicted_seconds_total"},
32453247
{"help", "Predict process time"},
3246-
{"value", (uint64_t) data["t_tokens_generation_total"] / 1.e3}
3248+
{"value", (uint64_t) data.at("t_tokens_generation_total") / 1.e3}
32473249
}}},
32483250
{"gauge", {{
32493251
{"name", "prompt_tokens_seconds"},
@@ -3260,15 +3262,15 @@ int main(int argc, char ** argv) {
32603262
},{
32613263
{"name", "kv_cache_tokens"},
32623264
{"help", "KV-cache tokens."},
3263-
{"value", (uint64_t) data["kv_cache_tokens_count"]}
3265+
{"value", (uint64_t) data.at("kv_cache_tokens_count")}
32643266
},{
32653267
{"name", "requests_processing"},
32663268
{"help", "Number of request processing."},
3267-
{"value", (uint64_t) data["processing"]}
3269+
{"value", (uint64_t) data.at("processing")}
32683270
},{
32693271
{"name", "requests_deferred"},
32703272
{"help", "Number of request deferred."},
3271-
{"value", (uint64_t) data["deferred"]}
3273+
{"value", (uint64_t) data.at("deferred")}
32723274
}}}
32733275
};
32743276

@@ -3279,8 +3281,8 @@ int main(int argc, char ** argv) {
32793281
const auto & metrics_def = el.value();
32803282

32813283
for (const auto & metric_def : metrics_def) {
3282-
const std::string name = metric_def["name"];
3283-
const std::string help = metric_def["help"];
3284+
const std::string name = metric_def.at("name");
3285+
const std::string help = metric_def.at("help");
32843286

32853287
auto value = json_value(metric_def, "value", 0.);
32863288
prometheus << "# HELP llamacpp:" << name << " " << help << "\n"
@@ -3289,7 +3291,7 @@ int main(int argc, char ** argv) {
32893291
}
32903292
}
32913293

3292-
const int64_t t_start = data["t_start"];
3294+
const int64_t t_start = data.at("t_start");
32933295
res.set_header("Process-Start-Time-Unix", std::to_string(t_start));
32943296

32953297
res.set_content(prometheus.str(), "text/plain; version=0.0.4");
@@ -3298,7 +3300,7 @@ int main(int argc, char ** argv) {
32983300

32993301
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33003302
json request_data = json::parse(req.body);
3301-
std::string filename = request_data["filename"];
3303+
std::string filename = request_data.at("filename");
33023304
if (!validate_file_name(filename)) {
33033305
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
33043306
return;
@@ -3328,7 +3330,7 @@ int main(int argc, char ** argv) {
33283330

33293331
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33303332
json request_data = json::parse(req.body);
3331-
std::string filename = request_data["filename"];
3333+
std::string filename = request_data.at("filename");
33323334
if (!validate_file_name(filename)) {
33333335
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
33343336
return;
@@ -3647,7 +3649,7 @@ int main(int argc, char ** argv) {
36473649

36483650
std::vector<llama_token> tokens;
36493651
if (body.count("content") != 0) {
3650-
tokens = ctx_server.tokenize(body["content"], false);
3652+
tokens = ctx_server.tokenize(body.at("content"), false);
36513653
}
36523654
const json data = format_tokenizer_response(tokens);
36533655
return res.set_content(data.dump(), "application/json; charset=utf-8");
@@ -3659,7 +3661,7 @@ int main(int argc, char ** argv) {
36593661

36603662
std::string content;
36613663
if (body.count("tokens") != 0) {
3662-
const std::vector<llama_token> tokens = body["tokens"];
3664+
const std::vector<llama_token> tokens = body.at("tokens");
36633665
content = tokens_to_str(ctx_server.ctx, tokens.cbegin(), tokens.cend());
36643666
}
36653667

@@ -3682,10 +3684,10 @@ int main(int argc, char ** argv) {
36823684
json prompt;
36833685
if (body.count("input") != 0) {
36843686
is_openai = true;
3685-
prompt = body["input"];
3687+
prompt = body.at("input");
36863688
} else if (body.count("content") != 0) {
36873689
// with "content", we only support single prompt
3688-
prompt = std::vector<std::string>{body["content"]};
3690+
prompt = std::vector<std::string>{body.at("content")};
36893691
} else {
36903692
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
36913693
return;
@@ -3704,7 +3706,7 @@ int main(int argc, char ** argv) {
37043706
if (!result.error) {
37053707
if (result.data.count("results")) {
37063708
// result for multi-task
3707-
responses = result.data["results"];
3709+
responses = result.data.at("results");
37083710
} else {
37093711
// result for single task
37103712
responses = std::vector<json>{result.data};

0 commit comments

Comments
 (0)