@@ -178,6 +178,7 @@ struct cmd_params {
178
178
std::vector<ggml_type> type_v;
179
179
std::vector<int > n_threads;
180
180
std::vector<int > n_gpu_layers;
181
+ std::vector<std::string> rpc_servers;
181
182
std::vector<llama_split_mode> split_mode;
182
183
std::vector<int > main_gpu;
183
184
std::vector<bool > no_kv_offload;
@@ -202,6 +203,7 @@ static const cmd_params cmd_params_defaults = {
202
203
/* type_v */ {GGML_TYPE_F16},
203
204
/* n_threads */ {cpu_get_num_math ()},
204
205
/* n_gpu_layers */ {99 },
206
+ /* rpc_servers */ {" " },
205
207
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
206
208
/* main_gpu */ {0 },
207
209
/* no_kv_offload */ {false },
@@ -230,6 +232,7 @@ static void print_usage(int /* argc */, char ** argv) {
230
232
printf (" -ctv, --cache-type-v <t> (default: %s)\n " , join (transform_to_str (cmd_params_defaults.type_v , ggml_type_name), " ," ).c_str ());
231
233
printf (" -t, --threads <n> (default: %s)\n " , join (cmd_params_defaults.n_threads , " ," ).c_str ());
232
234
printf (" -ngl, --n-gpu-layers <n> (default: %s)\n " , join (cmd_params_defaults.n_gpu_layers , " ," ).c_str ());
235
+ printf (" -rpc, --rpc <rpc_servers> (default: %s)\n " , join (cmd_params_defaults.rpc_servers , " ," ).c_str ());
233
236
printf (" -sm, --split-mode <none|layer|row> (default: %s)\n " , join (transform_to_str (cmd_params_defaults.split_mode , split_mode_str), " ," ).c_str ());
234
237
printf (" -mg, --main-gpu <i> (default: %s)\n " , join (cmd_params_defaults.main_gpu , " ," ).c_str ());
235
238
printf (" -nkvo, --no-kv-offload <0|1> (default: %s)\n " , join (cmd_params_defaults.no_kv_offload , " ," ).c_str ());
@@ -384,6 +387,12 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
384
387
}
385
388
auto p = split<int >(argv[i], split_delim);
386
389
params.n_gpu_layers .insert (params.n_gpu_layers .end (), p.begin (), p.end ());
390
+ } else if (arg == " -rpc" || arg == " --rpc" ) {
391
+ if (++i >= argc) {
392
+ invalid_param = true ;
393
+ break ;
394
+ }
395
+ params.rpc_servers .push_back (argv[i]);
387
396
} else if (arg == " -sm" || arg == " --split-mode" ) {
388
397
if (++i >= argc) {
389
398
invalid_param = true ;
@@ -519,6 +528,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
519
528
if (params.type_k .empty ()) { params.type_k = cmd_params_defaults.type_k ; }
520
529
if (params.type_v .empty ()) { params.type_v = cmd_params_defaults.type_v ; }
521
530
if (params.n_gpu_layers .empty ()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers ; }
531
+ if (params.rpc_servers .empty ()) { params.rpc_servers = cmd_params_defaults.rpc_servers ; }
522
532
if (params.split_mode .empty ()) { params.split_mode = cmd_params_defaults.split_mode ; }
523
533
if (params.main_gpu .empty ()) { params.main_gpu = cmd_params_defaults.main_gpu ; }
524
534
if (params.no_kv_offload .empty ()){ params.no_kv_offload = cmd_params_defaults.no_kv_offload ; }
@@ -541,6 +551,7 @@ struct cmd_params_instance {
541
551
ggml_type type_v;
542
552
int n_threads;
543
553
int n_gpu_layers;
554
+ std::string rpc_servers;
544
555
llama_split_mode split_mode;
545
556
int main_gpu;
546
557
bool no_kv_offload;
@@ -553,6 +564,9 @@ struct cmd_params_instance {
553
564
llama_model_params mparams = llama_model_default_params ();
554
565
555
566
mparams.n_gpu_layers = n_gpu_layers;
567
+ if (!rpc_servers.empty ()) {
568
+ mparams.rpc_servers = rpc_servers.c_str ();
569
+ }
556
570
mparams.split_mode = split_mode;
557
571
mparams.main_gpu = main_gpu;
558
572
mparams.tensor_split = tensor_split.data ();
@@ -564,6 +578,7 @@ struct cmd_params_instance {
564
578
bool equal_mparams (const cmd_params_instance & other) const {
565
579
return model == other.model &&
566
580
n_gpu_layers == other.n_gpu_layers &&
581
+ rpc_servers == other.rpc_servers &&
567
582
split_mode == other.split_mode &&
568
583
main_gpu == other.main_gpu &&
569
584
use_mmap == other.use_mmap &&
@@ -592,6 +607,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
592
607
// this ordering minimizes the number of times that each model needs to be reloaded
593
608
for (const auto & m : params.model )
594
609
for (const auto & nl : params.n_gpu_layers )
610
+ for (const auto & rpc : params.rpc_servers )
595
611
for (const auto & sm : params.split_mode )
596
612
for (const auto & mg : params.main_gpu )
597
613
for (const auto & ts : params.tensor_split )
@@ -618,6 +634,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
618
634
/* .type_v = */ tv,
619
635
/* .n_threads = */ nt,
620
636
/* .n_gpu_layers = */ nl,
637
+ /* .rpc_servers = */ rpc,
621
638
/* .split_mode = */ sm,
622
639
/* .main_gpu = */ mg,
623
640
/* .no_kv_offload= */ nkvo,
@@ -643,6 +660,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
643
660
/* .type_v = */ tv,
644
661
/* .n_threads = */ nt,
645
662
/* .n_gpu_layers = */ nl,
663
+ /* .rpc_servers = */ rpc,
646
664
/* .split_mode = */ sm,
647
665
/* .main_gpu = */ mg,
648
666
/* .no_kv_offload= */ nkvo,
@@ -668,6 +686,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
668
686
/* .type_v = */ tv,
669
687
/* .n_threads = */ nt,
670
688
/* .n_gpu_layers = */ nl,
689
+ /* .rpc_servers = */ rpc,
671
690
/* .split_mode = */ sm,
672
691
/* .main_gpu = */ mg,
673
692
/* .no_kv_offload= */ nkvo,
@@ -692,6 +711,7 @@ struct test {
692
711
static const bool kompute;
693
712
static const bool metal;
694
713
static const bool sycl;
714
+ static const bool rpc;
695
715
static const bool gpu_blas;
696
716
static const bool blas;
697
717
static const std::string cpu_info;
@@ -790,6 +810,9 @@ struct test {
790
810
if (sycl) {
791
811
return GGML_SYCL_NAME;
792
812
}
813
+ if (rpc) {
814
+ return " RPC" ;
815
+ }
793
816
if (gpu_blas) {
794
817
return " GPU BLAS" ;
795
818
}
@@ -803,7 +826,7 @@ struct test {
803
826
static const std::vector<std::string> & get_fields () {
804
827
static const std::vector<std::string> fields = {
805
828
" build_commit" , " build_number" ,
806
- " cuda" , " opencl" , " vulkan" , " kompute" , " metal" , " sycl" , " gpu_blas" , " blas" ,
829
+ " cuda" , " opencl" , " vulkan" , " kompute" , " metal" , " sycl" , " rpc " , " gpu_blas" , " blas" ,
807
830
" cpu_info" , " gpu_info" ,
808
831
" model_filename" , " model_type" , " model_size" , " model_n_params" ,
809
832
" n_batch" , " n_ubatch" ,
@@ -859,7 +882,7 @@ struct test {
859
882
std::vector<std::string> values = {
860
883
build_commit, std::to_string (build_number),
861
884
std::to_string (cuda), std::to_string (opencl), std::to_string (vulkan), std::to_string (vulkan),
862
- std::to_string (metal), std::to_string (sycl), std::to_string (gpu_blas), std::to_string (blas),
885
+ std::to_string (metal), std::to_string (sycl), std::to_string (rpc), std::to_string ( gpu_blas), std::to_string (blas),
863
886
cpu_info, gpu_info,
864
887
model_filename, model_type, std::to_string (model_size), std::to_string (model_n_params),
865
888
std::to_string (n_batch), std::to_string (n_ubatch),
@@ -894,6 +917,7 @@ const bool test::metal = !!ggml_cpu_has_metal();
894
917
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
895
918
const bool test::blas = !!ggml_cpu_has_blas();
896
919
const bool test::sycl = !!ggml_cpu_has_sycl();
920
+ const bool test::rpc = !!ggml_cpu_has_rpc();
897
921
const std::string test::cpu_info = get_cpu_info();
898
922
const std::string test::gpu_info = get_gpu_info();
899
923
0 commit comments