diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index ef77d9e2d3ff..7fc4776b0261 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -463,10 +463,6 @@ def schedule(self) -> SchedulerOutput: # always padded to the maximum length. If we support other # encoder-decoder models, this will need to be updated if we # want to only allocate what is needed. - assert ("whisper" - in self.vllm_config.model_config.model.lower()), ( - "Whisper is the only supported " - "encoder-decoder model.") num_encoder_tokens =\ self.scheduler_config.max_num_encoder_input_tokens else: