vllm-project · DarkLight1337 · Jun 15, 2024 · Jun 14, 2024 · Jun 15, 2024
diff --git a/vllm/config.py b/vllm/config.py
@@ -1092,6 +1092,9 @@ def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
                 "Due to limitations of the custom LoRA CUDA kernel, "
                 "max_num_batched_tokens must be <= 65528 when "
                 "LoRA is enabled.")
+        if scheduler_config.chunked_prefill_enabled:
+            raise ValueError(
+                "Lora is not supported with chunked prefill yet.")
 
 
 @dataclass