Skip to content

Commit 2e33b33

Browse files
committed
passthru rpc_servers params
wip
1 parent 0b5fa93 commit 2e33b33

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

llama_cpp/llama.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def __init__(
7070
split_mode: int = llama_cpp.LLAMA_SPLIT_MODE_LAYER,
7171
main_gpu: int = 0,
7272
tensor_split: Optional[List[float]] = None,
73+
rpc_servers: Optional[str] = None,
7374
vocab_only: bool = False,
7475
use_mmap: bool = True,
7576
use_mlock: bool = False,
@@ -148,6 +149,7 @@ def __init__(
148149
split_mode: How to split the model across GPUs. See llama_cpp.LLAMA_SPLIT_* for options.
149150
main_gpu: main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model. LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results. LLAMA_SPLIT_LAYER: ignored
150151
tensor_split: How split tensors should be distributed across GPUs. If None, the model is not split.
152+
rpc_servers: Comma separated list of RPC servers to use for offloading
151153
vocab_only: Only load the vocabulary no weights.
152154
use_mmap: Use mmap if possible.
153155
use_mlock: Force the system to keep the model in RAM.
@@ -219,6 +221,7 @@ def __init__(
219221
) # 0x7FFFFFFF is INT32 max, will be auto set to all layers
220222
self.model_params.split_mode = split_mode
221223
self.model_params.main_gpu = main_gpu
224+
self.model_params.rpc_servers = rpc_servers
222225
self.tensor_split = tensor_split
223226
self._c_tensor_split = None
224227
if self.tensor_split is not None:

0 commit comments

Comments
 (0)