Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vllm_ascend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,9 @@ def register():


def register_model():
# TODO: fixme when TritonPlaceholder fixed
from vllm_ascend.utils import vllm_version_is
if not (vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")):
import vllm_ascend.patch.worker.patch_main.patch_tritonplaceholder # noqa
from .models import register_model
register_model()
13 changes: 12 additions & 1 deletion vllm_ascend/patch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,15 @@
# - https://github.com/vllm-project/vllm-ascend/pull/395
# Future Plan:
# Revert it when the related pr is merged in vllm and vllm-ascend.
#
#
# ** File: worker/patch_main/patch_tritonplaceholder.py **
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 1. `triton` Module
# Why:
# Triton is not supported on npu currently, importing triton will break vllm-ascend
# How:
# ditto
# Related PR (if no, explain why): vllm haven't support yet
# TritonPlaceholder is only available in vllm>0.8.5.post1
# Future Plan:
# https://github.com/vllm-project/vllm/pull/17446
2 changes: 1 addition & 1 deletion vllm_ascend/patch/platform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from vllm_ascend.utils import vllm_version_is

# Import specific patches for different versions
if vllm_version_is("0.8.5"):
if vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1"):
from vllm_ascend.patch.platform import patch_0_8_5 # noqa: F401
from vllm_ascend.patch.platform import patch_common # noqa: F401
else:
Expand Down
4 changes: 2 additions & 2 deletions vllm_ascend/patch/worker/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
from vllm_ascend.utils import vllm_version_is

# Import specific patches for different versions
if vllm_version_is("0.8.5"):
if vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1"):
from vllm_ascend.patch.worker import patch_0_8_5 # noqa: F401
from vllm_ascend.patch.worker import patch_common # noqa: F401
else:
from vllm_ascend.patch.worker import patch_main # noqa: F401 # isort:skip
Copy link
Collaborator Author

@Yikun Yikun May 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Try move patch_main before patch_common to resolve:

https://github.com/vllm-project/vllm-ascend/actions/runs/14837426407/job/41651814910

ERROR 05-05 13:28:54 [registry.py:356] Traceback (most recent call last):
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/registry.py", line 354, in _try_inspect_model_cls
ERROR 05-05 13:28:54 [registry.py:356]     return model.inspect_model_cls()
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/registry.py", line 325, in inspect_model_cls
ERROR 05-05 13:28:54 [registry.py:356]     return _run_in_subprocess(
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/registry.py", line 590, in _run_in_subprocess
ERROR 05-05 13:28:54 [registry.py:356]     raise RuntimeError(f"Error raised in subprocess:\n"
ERROR 05-05 13:28:54 [registry.py:356] RuntimeError: Error raised in subprocess:
ERROR 05-05 13:28:54 [registry.py:356] /usr/local/python3.10/lib/python3.10/runpy.py:126: RuntimeWarning: 'vllm.model_executor.models.registry' found in sys.modules after import of package 'vllm.model_executor.models', but prior to execution of 'vllm.model_executor.models.registry'; this may result in unpredictable behaviour
ERROR 05-05 13:28:54 [registry.py:356]   warn(RuntimeWarning(msg))
ERROR 05-05 13:28:54 [registry.py:356] Traceback (most recent call last):
ERROR 05-05 13:28:54 [registry.py:356]   File "/usr/local/python3.10/lib/python3.10/runpy.py", line 196, in _run_module_as_main
ERROR 05-05 13:28:54 [registry.py:356]     return _run_code(code, main_globals, None,
ERROR 05-05 13:28:54 [registry.py:356]   File "/usr/local/python3.10/lib/python3.10/runpy.py", line 86, in _run_code
ERROR 05-05 13:28:54 [registry.py:356]     exec(code, run_globals)
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/registry.py", line 611, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     _run()
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/registry.py", line 600, in _run
ERROR 05-05 13:28:54 [registry.py:356]     load_general_plugins()
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/plugins/__init__.py", line 82, in load_general_plugins
ERROR 05-05 13:28:54 [registry.py:356]     func()
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm_ascend/__init__.py", line 29, in register_model
ERROR 05-05 13:28:54 [registry.py:356]     import vllm_ascend.patch.worker.patch_main.patch_tritonplaceholder  # noqa
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm_ascend/patch/worker/__init__.py", line 25, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     from vllm_ascend.patch.worker import patch_common  # noqa: F401
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm_ascend/patch/worker/patch_common/__init__.py", line 19, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     import vllm_ascend.patch.worker.patch_common.patch_minicpm  # noqa
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm_ascend/patch/worker/patch_common/patch_minicpm.py", line 19, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     from vllm.model_executor.models.minicpm import MiniCPMAttention
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/models/minicpm.py", line 39, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     from vllm.model_executor.layers.fused_moe import fused_moe
ERROR 05-05 13:28:54 [registry.py:356]   File "/__w/vllm-ascend/vllm-ascend/vllm-empty/vllm/model_executor/layers/fused_moe/fused_moe.py", line 9, in <module>
ERROR 05-05 13:28:54 [registry.py:356]     import triton
ERROR 05-05 13:28:54 [registry.py:356] ModuleNotFoundError: No module named 'triton'
ERROR 05-05 13:28:54 [registry.py:356] 

from vllm_ascend.patch.worker import patch_common # noqa: F401
from vllm_ascend.patch.worker import patch_main # noqa: F401
3 changes: 2 additions & 1 deletion vllm_ascend/patch/worker/patch_main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
import vllm_ascend.patch.worker.patch_main.patch_tritonplaceholder # noqa
71 changes: 71 additions & 0 deletions vllm_ascend/patch/worker/patch_main/patch_tritonplaceholder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# Copyright 2023 The vLLM team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
# Adapted from vllm/triton_utils/importing.py
#

import importlib
import sys
import types
from importlib.util import find_spec

from vllm.logger import logger

HAS_TRITON = (
find_spec("triton") is not None
or find_spec("pytorch-triton-xpu") is not None # Not compatible
)

if not HAS_TRITON:
logger.info("Triton not installed or not compatible; certain GPU-related"
" functions will not be available.")

class TritonPlaceholder(types.ModuleType):

def __init__(self):
super().__init__("triton")
self.jit = self._dummy_decorator("jit")
self.autotune = self._dummy_decorator("autotune")
self.heuristics = self._dummy_decorator("heuristics")
self.language = TritonLanguagePlaceholder()
self.__spec__ = importlib.machinery.ModuleSpec(
name="triton", loader=None, origin="placeholder")
logger.warning_once(
"Triton is not installed. Using dummy decorators. "
"Install it via `pip install triton` to enable kernel"
" compilation.")

def _dummy_decorator(self, name):

def decorator(func=None, **kwargs):
if func is None:
return lambda f: f
return func

return decorator

class TritonLanguagePlaceholder(types.ModuleType):

def __init__(self):
super().__init__("triton.language")
self.constexpr = None
self.dtype = None

sys.modules['triton'] = TritonPlaceholder()
sys.modules['triton.language'] = TritonLanguagePlaceholder()

if 'triton' in sys.modules:
logger.info("Triton module has been replaced with a placeholder.")