Skip to content

Commit 681c8ab

Browse files
thxCodeNeoZhangJianyu
authored andcommitted
rpc: fix register position (ggml-org#11424)
Signed-off-by: thxCode <[email protected]>
1 parent c17f246 commit 681c8ab

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

src/llama-model.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
13031303
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
13041304
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
13051305
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
1306+
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
13061307
return {cpu_dev, &pimpl->cpu_buft_list};
13071308
}
13081309
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
13091310
auto * dev = devices.at(layer_gpu);
1311+
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
13101312
return {dev, &pimpl->gpu_buft_list.at(dev)};
13111313
};
13121314

src/llama.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9409,6 +9409,7 @@ static struct llama_model * llama_model_load_from_file_impl(
94099409
model->devices.push_back(*dev);
94109410
}
94119411
} else {
9412+
std::vector<ggml_backend_dev_t> rpc_servers;
94129413
// use all available devices
94139414
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
94149415
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
@@ -9419,10 +9420,19 @@ static struct llama_model * llama_model_load_from_file_impl(
94199420
break;
94209421

94219422
case GGML_BACKEND_DEVICE_TYPE_GPU:
9422-
model->devices.push_back(dev);
9423+
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
9424+
if (ggml_backend_reg_name(reg) == std::string("RPC")) {
9425+
rpc_servers.push_back(dev);
9426+
} else {
9427+
model->devices.push_back(dev);
9428+
}
94239429
break;
94249430
}
94259431
}
9432+
// add RPC servers at the front of the list
9433+
if (!rpc_servers.empty()) {
9434+
model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
9435+
}
94269436
}
94279437

94289438
// if using single GPU mode, remove all except the main GPU

0 commit comments

Comments
 (0)