From d3f5a5828107e1334f75bd82697c7e36908bc0b1 Mon Sep 17 00:00:00 2001 From: 4onen Date: Sat, 12 Apr 2025 11:03:20 -0700 Subject: [PATCH 1/5] Add --override-tensors option to llama-bench --- examples/llama-bench/llama-bench.cpp | 69 ++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index cbcbfcee861ee..7cab877abb6d3 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -181,6 +181,7 @@ struct cmd_params { int reps; ggml_sched_priority prio; int delay; + std::vector tensor_buft_overrides; bool verbose; bool progress; output_formats output_format; @@ -213,6 +214,7 @@ static const cmd_params cmd_params_defaults = { /* reps */ 5, /* prio */ GGML_SCHED_PRIO_NORMAL, /* delay */ 0, + /* tensor_buft_overrides*/ {}, /* verbose */ false, /* progress */ false, /* output_format */ MARKDOWN, @@ -268,6 +270,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -r, --repetitions (default: %d)\n", cmd_params_defaults.reps); printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio); printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay); + printf(" -ot --override-tensors =,... (default:disabled)\n"); printf(" -o, --output (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -oe, --output-err (default: %s)\n", @@ -575,6 +578,56 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { break; } params.delay = std::stoi(argv[i]); + } else if (arg == "-ot" || arg == "--override-tensors") { + if (++i >= argc) { + invalid_param = true; + break; + } + auto value = argv[i]; + /* static */ std::map buft_list; + if (buft_list.empty()) { + // enumerate all the devices and add their buffer types to the list + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + auto * dev = ggml_backend_dev_get(i); + auto * buft = ggml_backend_dev_buffer_type(dev); + if (buft) { + buft_list[ggml_backend_buft_name(buft)] = buft; + } + } + } + auto override_span_len = std::strcspn(value, ","); + while (override_span_len > 0) { + // Stamps null terminators into the argv + // value for this option to avoid the + // memory leak present in the implementation + // over in arg.cpp. Maybe allowable because we + // only parse these args once in this program. + auto override = value; + if (value[override_span_len] != '\0') { + value[override_span_len] = '\0'; + value = &value[override_span_len + 1]; + } else { + value = &value[override_span_len]; + } + auto tensor_name_span_len = std::strcspn(override, "="); + if (tensor_name_span_len >= override_span_len) { + invalid_param = true; + break; + } + override[tensor_name_span_len] = '\0'; + auto tensor_name = override; + auto buffer_type = &override[tensor_name_span_len + 1]; + if (buft_list.find(buffer_type) == buft_list.end()) { + printf("Available buffer types:\n"); + for (const auto & it : buft_list) { + printf(" %s\n", ggml_backend_buft_name(it.second)); + } + invalid_param = true; + break; + } + params.tensor_buft_overrides.push_back({tensor_name, buft_list.at(buffer_type)}); + override_span_len = std::strcspn(value, ","); + } } else if (arg == "-o" || arg == "--output") { if (++i >= argc) { invalid_param = true; @@ -667,6 +720,11 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.poll = cmd_params_defaults.poll; } + // Attach terminators to options that requre them + if (!params.tensor_buft_overrides.empty()) { + params.tensor_buft_overrides.push_back({nullptr, nullptr}); + } + return params; } @@ -689,6 +747,7 @@ struct cmd_params_instance { bool no_kv_offload; bool flash_attn; std::vector tensor_split; + std::vector tensor_buft_overrides; bool use_mmap; bool embeddings; @@ -733,6 +792,13 @@ struct cmd_params_instance { mparams.tensor_split = tensor_split.data(); mparams.use_mmap = use_mmap; + if (tensor_buft_overrides.empty()) { + mparams.tensor_buft_overrides = nullptr; + } else { + GGML_ASSERT(tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern"); + mparams.tensor_buft_overrides = tensor_buft_overrides.data(); + } + return mparams; } @@ -804,6 +870,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, + /* .tensor_buft_overrides = */ params.tensor_buft_overrides, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; @@ -833,6 +900,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, + /* .tensor_buft_overrides = */ params.tensor_buft_overrides, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; @@ -862,6 +930,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, + /* .tensor_buft_overrides = */ params.tensor_buft_overrides, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; From c326b6266f9c9ad6f2fb0c74dc939278f2817c1f Mon Sep 17 00:00:00 2001 From: 4onen Date: Tue, 15 Apr 2025 12:40:03 -0700 Subject: [PATCH 2/5] Correct llama-bench --override-tensors to --override-tensor --- examples/llama-bench/llama-bench.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 7cab877abb6d3..8115b52e13b92 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -578,7 +578,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { break; } params.delay = std::stoi(argv[i]); - } else if (arg == "-ot" || arg == "--override-tensors") { + } else if (arg == "-ot" || arg == "--override-tensor") { if (++i >= argc) { invalid_param = true; break; From 833d11ea2e506d8a14a0e5c4825f418cb4a422dc Mon Sep 17 00:00:00 2001 From: 4onen Date: Tue, 15 Apr 2025 13:50:49 -0700 Subject: [PATCH 3/5] llama-bench: Update --override-tensors parsing to match --tensor-split, appear in test matrix. --- examples/llama-bench/llama-bench.cpp | 191 +++++++++++++++++++-------- 1 file changed, 138 insertions(+), 53 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 8115b52e13b92..f6e899ef15b2b 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -36,6 +36,46 @@ static uint64_t get_time_ns() { return std::chrono::nanoseconds(clock::now().time_since_epoch()).count(); } +bool tensor_buft_override_equal(const llama_model_tensor_buft_override& a, const llama_model_tensor_buft_override& b) { + if (a.pattern != b.pattern) { + // cString comparison that may be null + if (a.pattern == nullptr || b.pattern == nullptr) { + return false; + } + if (strcmp(a.pattern, b.pattern) != 0) { + return false; + } + } + if (a.buft != b.buft) { + return false; + } + return true; +} + +bool vec_tensor_buft_override_equal(const std::vector& a, const std::vector& b) { + if (a.size() != b.size()) { + return false; + } + for (size_t i = 0; i < a.size(); i++) { + if (!tensor_buft_override_equal(a[i], b[i])) { + return false; + } + } + return true; +} + +bool vec_vec_tensor_buft_override_equal(const std::vector>& a, const std::vector>& b) { + if (a.size() != b.size()) { + return false; + } + for (size_t i = 0; i < a.size(); i++) { + if (!vec_tensor_buft_override_equal(a[i], b[i])) { + return false; + } + } + return true; +} + template static std::string join(const std::vector & values, const std::string & delim) { std::ostringstream str; for (size_t i = 0; i < values.size(); i++) { @@ -175,13 +215,13 @@ struct cmd_params { std::vector no_kv_offload; std::vector flash_attn; std::vector> tensor_split; + std::vector> tensor_buft_overrides; std::vector use_mmap; std::vector embeddings; ggml_numa_strategy numa; int reps; ggml_sched_priority prio; int delay; - std::vector tensor_buft_overrides; bool verbose; bool progress; output_formats output_format; @@ -208,13 +248,13 @@ static const cmd_params cmd_params_defaults = { /* no_kv_offload */ { false }, /* flash_attn */ { false }, /* tensor_split */ { std::vector(llama_max_devices(), 0.0f) }, + /* tensor_buft_overrides*/ {}, /* use_mmap */ { true }, /* embeddings */ { false }, /* numa */ GGML_NUMA_STRATEGY_DISABLED, /* reps */ 5, /* prio */ GGML_SCHED_PRIO_NORMAL, /* delay */ 0, - /* tensor_buft_overrides*/ {}, /* verbose */ false, /* progress */ false, /* output_format */ MARKDOWN, @@ -267,10 +307,10 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str()); printf(" -ts, --tensor-split (default: 0)\n"); + printf(" -ot --override-tensors =;... (default:disabled)\n"); printf(" -r, --repetitions (default: %d)\n", cmd_params_defaults.reps); printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio); printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay); - printf(" -ot --override-tensors =,... (default:disabled)\n"); printf(" -o, --output (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -oe, --output-err (default: %s)\n", @@ -560,24 +600,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } params.tensor_split.push_back(tensor_split); } - } else if (arg == "-r" || arg == "--repetitions") { - if (++i >= argc) { - invalid_param = true; - break; - } - params.reps = std::stoi(argv[i]); - } else if (arg == "--prio") { - if (++i >= argc) { - invalid_param = true; - break; - } - params.prio = (enum ggml_sched_priority) std::stoi(argv[i]); - } else if (arg == "--delay") { - if (++i >= argc) { - invalid_param = true; - break; - } - params.delay = std::stoi(argv[i]); } else if (arg == "-ot" || arg == "--override-tensor") { if (++i >= argc) { invalid_param = true; @@ -595,39 +617,73 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } } } - auto override_span_len = std::strcspn(value, ","); - while (override_span_len > 0) { + auto override_group_span_len = std::strcspn(value, ","); + while (override_group_span_len > 0) { // Stamps null terminators into the argv // value for this option to avoid the // memory leak present in the implementation - // over in arg.cpp. Maybe allowable because we + // over in arg.cpp. Acceptable because we // only parse these args once in this program. - auto override = value; - if (value[override_span_len] != '\0') { - value[override_span_len] = '\0'; - value = &value[override_span_len + 1]; + auto override_group = value; + if (value[override_group_span_len] != '\0') { + value[override_group_span_len] = '\0'; + value = &value[override_group_span_len + 1]; } else { - value = &value[override_span_len]; - } - auto tensor_name_span_len = std::strcspn(override, "="); - if (tensor_name_span_len >= override_span_len) { - invalid_param = true; - break; + value = &value[override_group_span_len]; } - override[tensor_name_span_len] = '\0'; - auto tensor_name = override; - auto buffer_type = &override[tensor_name_span_len + 1]; - if (buft_list.find(buffer_type) == buft_list.end()) { - printf("Available buffer types:\n"); - for (const auto & it : buft_list) { - printf(" %s\n", ggml_backend_buft_name(it.second)); + std::vector group_tensor_buft_overrides{}; + auto override_span_len = std::strcspn(override_group, ";"); + while (override_span_len > 0) { + auto override = override_group; + if (override_group[override_span_len] != '\0') { + override_group[override_span_len] = '\0'; + override_group = &override_group[override_span_len + 1]; + } else { + override_group = &override_group[override_span_len]; } - invalid_param = true; + auto tensor_name_span_len = std::strcspn(override, "="); + if (tensor_name_span_len >= override_span_len) { + invalid_param = true; + break; + } + override[tensor_name_span_len] = '\0'; + auto tensor_name = override; + auto buffer_type = &override[tensor_name_span_len + 1]; + if (buft_list.find(buffer_type) == buft_list.end()) { + printf("Available buffer types:\n"); + for (const auto & it : buft_list) { + printf(" %s\n", ggml_backend_buft_name(it.second)); + } + invalid_param = true; + break; + } + group_tensor_buft_overrides.push_back({tensor_name, buft_list.at(buffer_type)}); + override_span_len = std::strcspn(override_group, ";"); + } + if (invalid_param) { break; } - params.tensor_buft_overrides.push_back({tensor_name, buft_list.at(buffer_type)}); - override_span_len = std::strcspn(value, ","); + params.tensor_buft_overrides.push_back(group_tensor_buft_overrides); + override_group_span_len = std::strcspn(value, ","); + } + } else if (arg == "-r" || arg == "--repetitions") { + if (++i >= argc) { + invalid_param = true; + break; } + params.reps = std::stoi(argv[i]); + } else if (arg == "--prio") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.prio = (enum ggml_sched_priority) std::stoi(argv[i]); + } else if (arg == "--delay") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.delay = std::stoi(argv[i]); } else if (arg == "-o" || arg == "--output") { if (++i >= argc) { invalid_param = true; @@ -701,6 +757,9 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { if (params.tensor_split.empty()) { params.tensor_split = cmd_params_defaults.tensor_split; } + if (params.tensor_buft_overrides.empty()) { + params.tensor_buft_overrides = cmd_params_defaults.tensor_buft_overrides; + } if (params.use_mmap.empty()) { params.use_mmap = cmd_params_defaults.use_mmap; } @@ -721,8 +780,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } // Attach terminators to options that requre them - if (!params.tensor_buft_overrides.empty()) { - params.tensor_buft_overrides.push_back({nullptr, nullptr}); + for (auto &tensor_buft_override_list : params.tensor_buft_overrides) { + if (!tensor_buft_override_list.empty()) { + tensor_buft_override_list.push_back({nullptr, nullptr}); + } } return params; @@ -805,7 +866,7 @@ struct cmd_params_instance { bool equal_mparams(const cmd_params_instance & other) const { return model == other.model && n_gpu_layers == other.n_gpu_layers && rpc_servers_str == other.rpc_servers_str && split_mode == other.split_mode && main_gpu == other.main_gpu && use_mmap == other.use_mmap && - tensor_split == other.tensor_split; + tensor_split == other.tensor_split && vec_tensor_buft_override_equal(tensor_buft_overrides, other.tensor_buft_overrides); } llama_context_params to_llama_cparams() const { @@ -835,6 +896,7 @@ static std::vector get_cmd_params_instances(const cmd_param for (const auto & sm : params.split_mode) for (const auto & mg : params.main_gpu) for (const auto & ts : params.tensor_split) + for (const auto & ot : params.tensor_buft_overrides) for (const auto & mmp : params.use_mmap) for (const auto & embd : params.embeddings) for (const auto & nb : params.n_batch) @@ -870,7 +932,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, - /* .tensor_buft_overrides = */ params.tensor_buft_overrides, + /* .tensor_buft_overrides = */ ot, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; @@ -900,7 +962,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, - /* .tensor_buft_overrides = */ params.tensor_buft_overrides, + /* .tensor_buft_overrides = */ ot, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; @@ -930,7 +992,7 @@ static std::vector get_cmd_params_instances(const cmd_param /* .no_kv_offload= */ nkvo, /* .flash_attn = */ fa, /* .tensor_split = */ ts, - /* .tensor_buft_overrides = */ params.tensor_buft_overrides, + /* .tensor_buft_overrides = */ ot, /* .use_mmap = */ mmp, /* .embeddings = */ embd, }; @@ -965,6 +1027,7 @@ struct test { bool no_kv_offload; bool flash_attn; std::vector tensor_split; + std::vector tensor_buft_overrides; bool use_mmap; bool embeddings; int n_prompt; @@ -996,6 +1059,7 @@ struct test { no_kv_offload = inst.no_kv_offload; flash_attn = inst.flash_attn; tensor_split = inst.tensor_split; + tensor_buft_overrides = inst.tensor_buft_overrides; use_mmap = inst.use_mmap; embeddings = inst.embeddings; n_prompt = inst.n_prompt; @@ -1041,9 +1105,9 @@ struct test { "build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads", "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers", - "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap", - "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns", - "avg_ts", "stddev_ts", + "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "tensor_buft_overrides", + "use_mmap", "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", + "stddev_ns", "avg_ts", "stddev_ts", }; return fields; } @@ -1069,6 +1133,7 @@ struct test { std::vector get_values() const { std::string tensor_split_str; + std::string tensor_buft_overrides_str; int max_nonzero = 0; for (size_t i = 0; i < llama_max_devices(); i++) { if (tensor_split[i] > 0) { @@ -1083,6 +1148,19 @@ struct test { tensor_split_str += "/"; } } + for (int i = 0; i < tensor_buft_overrides.size()-1; i++) { + // Last element of tensor_buft_overrides is always a null pattern + if (tensor_buft_overrides[i].pattern == nullptr) { + tensor_buft_overrides_str += "none"; + } else { + tensor_buft_overrides_str += tensor_buft_overrides[i].pattern; + tensor_buft_overrides_str += "="; + tensor_buft_overrides_str += ggml_backend_buft_name(tensor_buft_overrides[i].buft); + } + if (i + 2 < tensor_buft_overrides.size()) { + tensor_buft_overrides_str += ";"; + } + } std::vector values = { build_commit, std::to_string(build_number), cpu_info, @@ -1106,6 +1184,7 @@ struct test { std::to_string(no_kv_offload), std::to_string(flash_attn), tensor_split_str, + tensor_buft_overrides_str, std::to_string(use_mmap), std::to_string(embeddings), std::to_string(n_prompt), @@ -1323,6 +1402,9 @@ struct markdown_printer : public printer { if (field == "tensor_split") { return "ts"; } + if (field == "tensor_buft_overrides") { + return "ot"; + } return field; } @@ -1376,6 +1458,9 @@ struct markdown_printer : public printer { if (params.tensor_split.size() > 1 || params.tensor_split != cmd_params_defaults.tensor_split) { fields.emplace_back("tensor_split"); } + if (params.tensor_buft_overrides.size() > 1 || !vec_vec_tensor_buft_override_equal(params.tensor_buft_overrides, cmd_params_defaults.tensor_buft_overrides)) { + fields.emplace_back("tensor_buft_overrides"); + } if (params.use_mmap.size() > 1 || params.use_mmap != cmd_params_defaults.use_mmap) { fields.emplace_back("use_mmap"); } From 405224a51c4d14dbaef4923bc3aad08776b59432 Mon Sep 17 00:00:00 2001 From: Matthew Dupree Date: Wed, 16 Apr 2025 09:14:56 -0700 Subject: [PATCH 4/5] Make new llama-bench util functions static to fix Ubuntu CI --- examples/llama-bench/llama-bench.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index f6e899ef15b2b..d5635be92342e 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -36,7 +36,7 @@ static uint64_t get_time_ns() { return std::chrono::nanoseconds(clock::now().time_since_epoch()).count(); } -bool tensor_buft_override_equal(const llama_model_tensor_buft_override& a, const llama_model_tensor_buft_override& b) { +static bool tensor_buft_override_equal(const llama_model_tensor_buft_override& a, const llama_model_tensor_buft_override& b) { if (a.pattern != b.pattern) { // cString comparison that may be null if (a.pattern == nullptr || b.pattern == nullptr) { @@ -52,7 +52,7 @@ bool tensor_buft_override_equal(const llama_model_tensor_buft_override& a, const return true; } -bool vec_tensor_buft_override_equal(const std::vector& a, const std::vector& b) { +static bool vec_tensor_buft_override_equal(const std::vector& a, const std::vector& b) { if (a.size() != b.size()) { return false; } @@ -64,7 +64,7 @@ bool vec_tensor_buft_override_equal(const std::vector>& a, const std::vector>& b) { +static bool vec_vec_tensor_buft_override_equal(const std::vector>& a, const std::vector>& b) { if (a.size() != b.size()) { return false; } @@ -1148,7 +1148,7 @@ struct test { tensor_split_str += "/"; } } - for (int i = 0; i < tensor_buft_overrides.size()-1; i++) { + for (size_t i = 0; i < tensor_buft_overrides.size()-1; i++) { // Last element of tensor_buft_overrides is always a null pattern if (tensor_buft_overrides[i].pattern == nullptr) { tensor_buft_overrides_str += "none"; From d2cdd957974b45afea73ff29c45e3c52b1363de1 Mon Sep 17 00:00:00 2001 From: 4onen Date: Wed, 23 Apr 2025 15:19:09 -0700 Subject: [PATCH 5/5] llama-bench: Correct -ot corner cases (No -ot calls, leading and trailing empty -ot spans, etc.) --- examples/llama-bench/llama-bench.cpp | 63 +++++++++++++++++----------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index d5635be92342e..564a51bfd7b6c 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -248,7 +248,7 @@ static const cmd_params cmd_params_defaults = { /* no_kv_offload */ { false }, /* flash_attn */ { false }, /* tensor_split */ { std::vector(llama_max_devices(), 0.0f) }, - /* tensor_buft_overrides*/ {}, + /* tensor_buft_overrides*/ { std::vector{{nullptr,nullptr}} }, /* use_mmap */ { true }, /* embeddings */ { false }, /* numa */ GGML_NUMA_STRATEGY_DISABLED, @@ -307,7 +307,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str()); printf(" -ts, --tensor-split (default: 0)\n"); - printf(" -ot --override-tensors =;... (default:disabled)\n"); + printf(" -ot --override-tensors =;... (default: disabled)\n"); printf(" -r, --repetitions (default: %d)\n", cmd_params_defaults.reps); printf(" --prio <0|1|2|3> (default: %d)\n", cmd_params_defaults.prio); printf(" --delay <0...N> (seconds) (default: %d)\n", cmd_params_defaults.delay); @@ -618,18 +618,32 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } } auto override_group_span_len = std::strcspn(value, ","); - while (override_group_span_len > 0) { + bool last_group = false; + do { + if (override_group_span_len == 0) { + // Adds an empty override-tensors for an empty span + params.tensor_buft_overrides.push_back({{}}); + if (value[override_group_span_len] == '\0') { + value = &value[override_group_span_len]; + last_group = true; + } else { + value = &value[override_group_span_len + 1]; + override_group_span_len = std::strcspn(value, ","); + } + continue; + } // Stamps null terminators into the argv // value for this option to avoid the // memory leak present in the implementation // over in arg.cpp. Acceptable because we // only parse these args once in this program. auto override_group = value; - if (value[override_group_span_len] != '\0') { + if (value[override_group_span_len] == '\0') { + value = &value[override_group_span_len]; + last_group = true; + } else { value[override_group_span_len] = '\0'; value = &value[override_group_span_len + 1]; - } else { - value = &value[override_group_span_len]; } std::vector group_tensor_buft_overrides{}; auto override_span_len = std::strcspn(override_group, ";"); @@ -663,9 +677,10 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { if (invalid_param) { break; } + group_tensor_buft_overrides.push_back({nullptr,nullptr}); params.tensor_buft_overrides.push_back(group_tensor_buft_overrides); override_group_span_len = std::strcspn(value, ","); - } + } while (!last_group); } else if (arg == "-r" || arg == "--repetitions") { if (++i >= argc) { invalid_param = true; @@ -779,13 +794,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.poll = cmd_params_defaults.poll; } - // Attach terminators to options that requre them - for (auto &tensor_buft_override_list : params.tensor_buft_overrides) { - if (!tensor_buft_override_list.empty()) { - tensor_buft_override_list.push_back({nullptr, nullptr}); - } - } - return params; } @@ -1148,17 +1156,24 @@ struct test { tensor_split_str += "/"; } } - for (size_t i = 0; i < tensor_buft_overrides.size()-1; i++) { + if (tensor_buft_overrides.size() == 1) { // Last element of tensor_buft_overrides is always a null pattern - if (tensor_buft_overrides[i].pattern == nullptr) { - tensor_buft_overrides_str += "none"; - } else { - tensor_buft_overrides_str += tensor_buft_overrides[i].pattern; - tensor_buft_overrides_str += "="; - tensor_buft_overrides_str += ggml_backend_buft_name(tensor_buft_overrides[i].buft); - } - if (i + 2 < tensor_buft_overrides.size()) { - tensor_buft_overrides_str += ";"; + // so if it is only one element long, it must be a null pattern. + GGML_ASSERT(tensor_buft_overrides[0].pattern == nullptr); + tensor_buft_overrides_str += "none"; + } else { + for (size_t i = 0; i < tensor_buft_overrides.size()-1; i++) { + // Last element of tensor_buft_overrides is always a null pattern + if (tensor_buft_overrides[i].pattern == nullptr) { + tensor_buft_overrides_str += "none"; + } else { + tensor_buft_overrides_str += tensor_buft_overrides[i].pattern; + tensor_buft_overrides_str += "="; + tensor_buft_overrides_str += ggml_backend_buft_name(tensor_buft_overrides[i].buft); + } + if (i + 2 < tensor_buft_overrides.size()) { + tensor_buft_overrides_str += ";"; + } } } std::vector values = { build_commit,