diff --git a/Makefile b/Makefile index 9dc35410a2945..c1b63616b680d 100644 --- a/Makefile +++ b/Makefile @@ -796,7 +796,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) -server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) +server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/params.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) diff --git a/build.zig b/build.zig index 96783574fe740..8ad48a3ab5bf4 100644 --- a/build.zig +++ b/build.zig @@ -141,7 +141,7 @@ pub fn build(b: *std.build.Builder) !void { server.linkSystemLibrary("ws2_32"); } - const server_assets = [_][]const u8{ "index.html", "index.js", "completion.js", "json-schema-to-grammar.mjs" }; + const server_assets = [_][]const u8{ "index.html", "index.js", "completion.js", "params.js", "json-schema-to-grammar.mjs" }; for (server_assets) |asset| { const input_path = b.fmt("examples/server/public/{s}", .{asset}); const output_path = b.fmt("examples/server/{s}.hpp", .{asset}); diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt index 4b89c53022fd0..937bf4b1d830f 100644 --- a/examples/server/CMakeLists.txt +++ b/examples/server/CMakeLists.txt @@ -11,6 +11,7 @@ set(PUBLIC_ASSETS index.html index.js completion.js + params.js json-schema-to-grammar.mjs ) foreach(asset ${PUBLIC_ASSETS}) diff --git a/examples/server/README.md b/examples/server/README.md index 918ac1295d660..4bd6ceb24904d 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -690,3 +690,12 @@ You can use html formatting if needed. ``` + +### Update default parameters + +Update with desired values in `examples/server/public/params.js` and set +`--path` flag to the server. For instance, + +```bash +--path /path/to/llama.cpp/examples/server/public/ +``` diff --git a/examples/server/public/index.html b/examples/server/public/index.html index 2961999f2451a..ba7a1a8c4caf7 100644 --- a/examples/server/public/index.html +++ b/examples/server/public/index.html @@ -203,44 +203,13 @@ import { llama } from './completion.js'; import { SchemaConverter } from './json-schema-to-grammar.mjs'; + + // load default session values and parameters + import { session, params } from './params.js'; + let selected_image = false; var slot_id = -1; - const session = signal({ - prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.", - template: "{{prompt}}\n\n{{history}}\n{{char}}:", - historyTemplate: "{{name}}: {{message}}", - transcript: [], - type: "chat", // "chat" | "completion" - char: "Llama", - user: "User", - image_selected: '' - }) - - const params = signal({ - n_predict: 400, - temperature: 0.7, - repeat_last_n: 256, // 0 = disable penalty, -1 = context size - repeat_penalty: 1.18, // 1.0 = disabled - penalize_nl: false, - top_k: 40, // <= 0 to use vocab size - top_p: 0.95, // 1.0 = disabled - min_p: 0.05, // 0 = disabled - tfs_z: 1.0, // 1.0 = disabled - typical_p: 1.0, // 1.0 = disabled - presence_penalty: 0.0, // 0.0 = disabled - frequency_penalty: 0.0, // 0.0 = disabled - mirostat: 0, // 0/1/2 - mirostat_tau: 5, // target entropy - mirostat_eta: 0.1, // learning rate - grammar: '', - n_probs: 0, // no completion_probabilities, - min_keep: 0, // min probs from each sampler, - image_data: [], - cache_prompt: true, - api_key: '' - }) - /* START: Support for storing prompt templates and parameters in browsers LocalStorage */ const local_storage_storageKey = "llamacpp_server_local_storage"; diff --git a/examples/server/public/params.js b/examples/server/public/params.js new file mode 100644 index 0000000000000..df0ca4c310ab1 --- /dev/null +++ b/examples/server/public/params.js @@ -0,0 +1,36 @@ +import { signal } from './index.js'; + +export const session = signal({ + prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.", + template: "{{prompt}}\n\n{{history}}\n{{char}}:", + historyTemplate: "{{name}}: {{message}}", + transcript: [], + type: "chat", // "chat" | "completion" + char: "Llama", + user: "User", + image_selected: '' +}) + +export const params = signal({ + n_predict: 400, + temperature: 0.7, + repeat_last_n: 256, // 0 = disable penalty, -1 = context size + repeat_penalty: 1.18, // 1.0 = disabled + penalize_nl: false, + top_k: 40, // <= 0 to use vocab size + top_p: 0.95, // 1.0 = disabled + min_p: 0.05, // 0 = disabled + tfs_z: 1.0, // 1.0 = disabled + typical_p: 1.0, // 1.0 = disabled + presence_penalty: 0.0, // 0.0 = disabled + frequency_penalty: 0.0, // 0.0 = disabled + mirostat: 0, // 0/1/2 + mirostat_tau: 5, // target entropy + mirostat_eta: 0.1, // learning rate + grammar: '', + n_probs: 0, // no completion_probabilities, + min_keep: 0, // min probs from each sampler, + image_data: [], + cache_prompt: true, + api_key: '' +}) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2760aea8fd3e9..5ddb23bfb55d5 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -14,10 +14,11 @@ #include "httplib.h" #include "json.hpp" -// auto generated files (update with ./deps.sh) +// auto generated files (update during build time (#6661)) #include "index.html.hpp" #include "index.js.hpp" #include "completion.js.hpp" +#include "params.js.hpp" #include "json-schema-to-grammar.mjs.hpp" #include