Skip to content

Commit 210d991

Browse files
authored
llama-bench : add support for the RPC backend (#7435)
1 parent 87bdf2a commit 210d991

File tree

3 files changed

+35
-2
lines changed

3 files changed

+35
-2
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ struct cmd_params {
178178
std::vector<ggml_type> type_v;
179179
std::vector<int> n_threads;
180180
std::vector<int> n_gpu_layers;
181+
std::vector<std::string> rpc_servers;
181182
std::vector<llama_split_mode> split_mode;
182183
std::vector<int> main_gpu;
183184
std::vector<bool> no_kv_offload;
@@ -202,6 +203,7 @@ static const cmd_params cmd_params_defaults = {
202203
/* type_v */ {GGML_TYPE_F16},
203204
/* n_threads */ {cpu_get_num_math()},
204205
/* n_gpu_layers */ {99},
206+
/* rpc_servers */ {""},
205207
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
206208
/* main_gpu */ {0},
207209
/* no_kv_offload */ {false},
@@ -230,6 +232,7 @@ static void print_usage(int /* argc */, char ** argv) {
230232
printf(" -ctv, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
231233
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
232234
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
235+
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
233236
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
234237
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
235238
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
@@ -384,6 +387,12 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
384387
}
385388
auto p = split<int>(argv[i], split_delim);
386389
params.n_gpu_layers.insert(params.n_gpu_layers.end(), p.begin(), p.end());
390+
} else if (arg == "-rpc" || arg == "--rpc") {
391+
if (++i >= argc) {
392+
invalid_param = true;
393+
break;
394+
}
395+
params.rpc_servers.push_back(argv[i]);
387396
} else if (arg == "-sm" || arg == "--split-mode") {
388397
if (++i >= argc) {
389398
invalid_param = true;
@@ -519,6 +528,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
519528
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; }
520529
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
521530
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
531+
if (params.rpc_servers.empty()) { params.rpc_servers = cmd_params_defaults.rpc_servers; }
522532
if (params.split_mode.empty()) { params.split_mode = cmd_params_defaults.split_mode; }
523533
if (params.main_gpu.empty()) { params.main_gpu = cmd_params_defaults.main_gpu; }
524534
if (params.no_kv_offload.empty()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload; }
@@ -541,6 +551,7 @@ struct cmd_params_instance {
541551
ggml_type type_v;
542552
int n_threads;
543553
int n_gpu_layers;
554+
std::string rpc_servers;
544555
llama_split_mode split_mode;
545556
int main_gpu;
546557
bool no_kv_offload;
@@ -553,6 +564,9 @@ struct cmd_params_instance {
553564
llama_model_params mparams = llama_model_default_params();
554565

555566
mparams.n_gpu_layers = n_gpu_layers;
567+
if (!rpc_servers.empty()) {
568+
mparams.rpc_servers = rpc_servers.c_str();
569+
}
556570
mparams.split_mode = split_mode;
557571
mparams.main_gpu = main_gpu;
558572
mparams.tensor_split = tensor_split.data();
@@ -564,6 +578,7 @@ struct cmd_params_instance {
564578
bool equal_mparams(const cmd_params_instance & other) const {
565579
return model == other.model &&
566580
n_gpu_layers == other.n_gpu_layers &&
581+
rpc_servers == other.rpc_servers &&
567582
split_mode == other.split_mode &&
568583
main_gpu == other.main_gpu &&
569584
use_mmap == other.use_mmap &&
@@ -592,6 +607,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
592607
// this ordering minimizes the number of times that each model needs to be reloaded
593608
for (const auto & m : params.model)
594609
for (const auto & nl : params.n_gpu_layers)
610+
for (const auto & rpc : params.rpc_servers)
595611
for (const auto & sm : params.split_mode)
596612
for (const auto & mg : params.main_gpu)
597613
for (const auto & ts : params.tensor_split)
@@ -618,6 +634,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
618634
/* .type_v = */ tv,
619635
/* .n_threads = */ nt,
620636
/* .n_gpu_layers = */ nl,
637+
/* .rpc_servers = */ rpc,
621638
/* .split_mode = */ sm,
622639
/* .main_gpu = */ mg,
623640
/* .no_kv_offload= */ nkvo,
@@ -643,6 +660,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
643660
/* .type_v = */ tv,
644661
/* .n_threads = */ nt,
645662
/* .n_gpu_layers = */ nl,
663+
/* .rpc_servers = */ rpc,
646664
/* .split_mode = */ sm,
647665
/* .main_gpu = */ mg,
648666
/* .no_kv_offload= */ nkvo,
@@ -668,6 +686,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
668686
/* .type_v = */ tv,
669687
/* .n_threads = */ nt,
670688
/* .n_gpu_layers = */ nl,
689+
/* .rpc_servers = */ rpc,
671690
/* .split_mode = */ sm,
672691
/* .main_gpu = */ mg,
673692
/* .no_kv_offload= */ nkvo,
@@ -692,6 +711,7 @@ struct test {
692711
static const bool kompute;
693712
static const bool metal;
694713
static const bool sycl;
714+
static const bool rpc;
695715
static const bool gpu_blas;
696716
static const bool blas;
697717
static const std::string cpu_info;
@@ -790,6 +810,9 @@ struct test {
790810
if (sycl) {
791811
return GGML_SYCL_NAME;
792812
}
813+
if (rpc) {
814+
return "RPC";
815+
}
793816
if (gpu_blas) {
794817
return "GPU BLAS";
795818
}
@@ -803,7 +826,7 @@ struct test {
803826
static const std::vector<std::string> & get_fields() {
804827
static const std::vector<std::string> fields = {
805828
"build_commit", "build_number",
806-
"cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas",
829+
"cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas", "blas",
807830
"cpu_info", "gpu_info",
808831
"model_filename", "model_type", "model_size", "model_n_params",
809832
"n_batch", "n_ubatch",
@@ -859,7 +882,7 @@ struct test {
859882
std::vector<std::string> values = {
860883
build_commit, std::to_string(build_number),
861884
std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
862-
std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas),
885+
std::to_string(metal), std::to_string(sycl), std::to_string(rpc), std::to_string(gpu_blas), std::to_string(blas),
863886
cpu_info, gpu_info,
864887
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
865888
std::to_string(n_batch), std::to_string(n_ubatch),
@@ -894,6 +917,7 @@ const bool test::metal = !!ggml_cpu_has_metal();
894917
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
895918
const bool test::blas = !!ggml_cpu_has_blas();
896919
const bool test::sycl = !!ggml_cpu_has_sycl();
920+
const bool test::rpc = !!ggml_cpu_has_rpc();
897921
const std::string test::cpu_info = get_cpu_info();
898922
const std::string test::gpu_info = get_gpu_info();
899923

ggml.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22872,6 +22872,14 @@ int ggml_cpu_has_sycl(void) {
2287222872
#endif
2287322873
}
2287422874

22875+
int ggml_cpu_has_rpc(void) {
22876+
#if defined(GGML_USE_RPC)
22877+
return 1;
22878+
#else
22879+
return 0;
22880+
#endif
22881+
}
22882+
2287522883
int ggml_cpu_has_gpublas(void) {
2287622884
return ggml_cpu_has_cuda() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
2287722885
ggml_cpu_has_sycl();

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2428,6 +2428,7 @@ extern "C" {
24282428
GGML_API int ggml_cpu_has_sse3 (void);
24292429
GGML_API int ggml_cpu_has_ssse3 (void);
24302430
GGML_API int ggml_cpu_has_sycl (void);
2431+
GGML_API int ggml_cpu_has_rpc (void);
24312432
GGML_API int ggml_cpu_has_vsx (void);
24322433
GGML_API int ggml_cpu_has_matmul_int8(void);
24332434

0 commit comments

Comments
 (0)