Skip to content

Commit adbb44f

Browse files
jeejeeleeDamonFool
authored andcommitted
[Bugfix] Make the deviceprofiler include LoRA memory. (vllm-project#14469)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent 8fdd3a2 commit adbb44f

File tree

1 file changed

+29
-29
lines changed

1 file changed

+29
-29
lines changed

vllm/worker/model_runner.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,41 +1111,41 @@ def load_model(self) -> None:
11111111
with DeviceMemoryProfiler(self.device) as m:
11121112
time_before_load = time.perf_counter()
11131113
self.model = get_model(vllm_config=self.vllm_config)
1114+
if self.lora_config:
1115+
assert supports_lora(
1116+
self.model
1117+
), f"{self.model.__class__.__name__} does not support LoRA yet."
1118+
1119+
if supports_multimodal(self.model):
1120+
logger.warning(
1121+
"Regarding multimodal models, vLLM currently "
1122+
"only supports adding LoRA to language model.")
1123+
# It's necessary to distinguish between the
1124+
# max_position_embeddings of VLMs and LLMs.
1125+
if hasattr(self.model.config, "max_position_embeddings"):
1126+
max_pos_embeddings = (
1127+
self.model.config.max_position_embeddings)
1128+
else:
1129+
max_pos_embeddings = (
1130+
self.model.config.text_config.max_position_embeddings)
1131+
1132+
self.lora_manager = LRUCacheWorkerLoRAManager(
1133+
self.scheduler_config.max_num_seqs,
1134+
self.scheduler_config.max_num_batched_tokens,
1135+
self.vocab_size,
1136+
self.lora_config,
1137+
self.device,
1138+
self.model.embedding_modules,
1139+
self.model.embedding_padding_modules,
1140+
max_position_embeddings=max_pos_embeddings,
1141+
)
1142+
self.model = self.lora_manager.create_lora_manager(self.model)
11141143
time_after_load = time.perf_counter()
11151144

11161145
self.model_memory_usage = m.consumed_memory
11171146
logger.info("Model loading took %.4f GB and %.6f seconds",
11181147
self.model_memory_usage / float(2**30),
11191148
time_after_load - time_before_load)
1120-
1121-
if self.lora_config:
1122-
assert supports_lora(
1123-
self.model
1124-
), f"{self.model.__class__.__name__} does not support LoRA yet."
1125-
1126-
if supports_multimodal(self.model):
1127-
logger.warning("Regarding multimodal models, vLLM currently "
1128-
"only supports adding LoRA to language model.")
1129-
# It's necessary to distinguish between the max_position_embeddings
1130-
# of VLMs and LLMs.
1131-
if hasattr(self.model.config, "max_position_embeddings"):
1132-
max_pos_embeddings = self.model.config.max_position_embeddings
1133-
else:
1134-
max_pos_embeddings = (
1135-
self.model.config.text_config.max_position_embeddings)
1136-
1137-
self.lora_manager = LRUCacheWorkerLoRAManager(
1138-
self.scheduler_config.max_num_seqs,
1139-
self.scheduler_config.max_num_batched_tokens,
1140-
self.vocab_size,
1141-
self.lora_config,
1142-
self.device,
1143-
self.model.embedding_modules,
1144-
self.model.embedding_padding_modules,
1145-
max_position_embeddings=max_pos_embeddings,
1146-
)
1147-
self.model = self.lora_manager.create_lora_manager(self.model)
1148-
11491149
if self.prompt_adapter_config:
11501150
self.prompt_adapter_manager = LRUCacheWorkerPromptAdapterManager(
11511151
self.scheduler_config.max_num_seqs,

0 commit comments

Comments
 (0)