Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions vllm/model_executor/layers/quantization/utils/fp8_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,13 @@ def rocm_aiter_gemm_w8a8_blockscale_fake(
fake_impl=rocm_aiter_gemm_w8a8_blockscale_fake,
dispatch_key=current_platform.dispatch_key,
)
if (envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_LINEAR
and current_platform.is_fp8_fnuz()):

import aiter as rocm_aiter
from aiter import get_hip_quant

aiter_per1x128_quant = get_hip_quant(rocm_aiter.QuantType.per_1x128)


def dispatch_w8a8_blockscale_func(
Expand Down Expand Up @@ -178,8 +185,12 @@ def apply_w8a8_block_fp8_linear(
block_size, input.dtype)

else:
q_input, x_scale = per_token_group_quant_fp8(
input_2d, block_size[1], column_major_scales=use_cutlass)
if use_aiter_and_is_supported:
q_input, x_scale = aiter_per1x128_quant(
input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8)
Comment on lines +188 to +190
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The call to aiter_per1x128_quant is not guarded by a try-except block, which can lead to a crash if the aiter module fails to load. Wrap this call in a try-except block to handle potential import errors gracefully.

try:
            q_input, x_scale = aiter_per1x128_quant(
                input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8)
        except NameError as e:
            raise ImportError("AITER is not properly installed or configured.") from e

else:
q_input, x_scale = per_token_group_quant_fp8(
input_2d, block_size[1], column_major_scales=use_cutlass)

output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale,
block_size, input.dtype)
Expand Down