From 696a43c94cb088a000ec6fea4dad6729a17956c7 Mon Sep 17 00:00:00 2001
From: sang <rkooo567@gmail.com>
Date: Fri, 14 Jun 2024 03:56:13 -0700
Subject: [PATCH 1/2] .

---
 vllm/config.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/config.py b/vllm/config.py
index d9e4a619ee01..8ae23b551144 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1092,6 +1092,9 @@ def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
                 "Due to limitations of the custom LoRA CUDA kernel, "
                 "max_num_batched_tokens must be <= 65528 when "
                 "LoRA is enabled.")
+        if scheduler_config.chunked_prefill_enabled:
+            raise ValueError(
+                "Lora is not supported with chunked prefill yet.")
 
 
 @dataclass

From 7c9f3a4d1c86909af04dd8b8e280f1940a97f01f Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Sat, 15 Jun 2024 21:13:43 +0800
Subject: [PATCH 2/2] Fix linter error and use proper capitalization

---
 vllm/config.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 8ae23b551144..54f36e1d6678 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1093,8 +1093,7 @@ def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig):
                 "max_num_batched_tokens must be <= 65528 when "
                 "LoRA is enabled.")
         if scheduler_config.chunked_prefill_enabled:
-            raise ValueError(
-                "Lora is not supported with chunked prefill yet.")
+            raise ValueError("LoRA is not supported with chunked prefill yet.")
 
 
 @dataclass