[BugFix][Core] Fix BlockManagerV2 when Encoder Input is None (#9103)

sroy745 · web-flow · commit c8f26bb63694 · 2024-10-07T03:52:42.000Z
diff --git a/vllm/core/block/block_table.py b/vllm/core/block/block_table.py
@@ -220,7 +220,6 @@ def free(self) -> None:
         occupied by each block. After freeing all the blocks, the `_blocks` list
         is set to `None`.
         """
-        assert self._is_allocated
         for block in self.blocks:
             self._allocator.free(block)
         self._blocks.reset()
@@ -239,7 +238,6 @@ def physical_block_ids(self) -> List[int]:
             List[int]: A list of physical block indices for the blocks in the
                 BlockTable.
         """
-        assert self._is_allocated
         return self._blocks.ids()
 
     def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]:
diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
@@ -151,7 +151,9 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:
             block_allocator=self.block_allocator,
             max_block_sliding_window=self.max_block_sliding_window,
         )
-        block_table.allocate(seq.get_token_ids())
+        if seq.get_token_ids():
+            # Add blocks to the block table only if the sequence is non empty.
+            block_table.allocate(seq.get_token_ids())
 
         return block_table
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -903,11 +903,6 @@ def create_engine_config(self) -> EngineConfig:
                     "--enable-prefix-caching is currently not "
                     "supported for multimodal models and has been disabled.")
             self.enable_prefix_caching = False
-        if model_config.is_encoder_decoder_model:
-            logger.warning(
-                "Block Manager v2 does not support encoder-decoder models"
-                " currently. Using Block Manager v1 as fallback.")
-            self.use_v2_block_manager = False
 
         cache_config = CacheConfig(
             block_size=self.block_size if self.device != "neuron" else

Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,9 @@ def _allocate_sequence(self, seq: Sequence) -> BlockTable:`
`151`	`151`	`block_allocator=self.block_allocator,`
`152`	`152`	`max_block_sliding_window=self.max_block_sliding_window,`
`153`	`153`	`)`
`154`		`- block_table.allocate(seq.get_token_ids())`
	`154`	`+ if seq.get_token_ids():`
	`155`	`+ # Add blocks to the block table only if the sequence is non empty.`
	`156`	`+ block_table.allocate(seq.get_token_ids())`
`155`	`157`
`156`	`158`	`return block_table`
`157`	`159`