Skip to content

Commit 8f9c28f

Browse files
authored
[Bugfix] Fix CustomAllreduce nvlink topology detection (#3974)
[Bugfix] Fix CustomAllreduce pcie nvlink topology detection (#3974) (#4159)
1 parent cd2f63f commit 8f9c28f

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

vllm/distributed/device_communicators/custom_all_reduce.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,10 @@ def _is_full_nvlink(rank, world_size):
145145
for i in range(world_size):
146146
if i != rank:
147147
try:
148-
link_state = pynvml.nvmlDeviceGetNvLinkState(handle, i)
149-
if not link_state:
148+
peer_handle = pynvml.nvmlDeviceGetHandleByIndex(i)
149+
p2p_status = pynvml.nvmlDeviceGetP2PStatus(
150+
handle, peer_handle, pynvml.NVML_P2P_CAPS_INDEX_NVLINK)
151+
if p2p_status != pynvml.NVML_P2P_STATUS_OK:
150152
return False
151153
except pynvml.NVMLError as error:
152154
logger.info(

0 commit comments

Comments
 (0)