33import os
44import random
55import tempfile
6+ from typing import Union
67from unittest .mock import patch
78
9+ import pytest
10+
11+ import vllm .envs as envs
812from vllm .config import (CacheConfig , DeviceConfig , LoadConfig , LoRAConfig ,
913 ModelConfig , ParallelConfig , SchedulerConfig ,
1014 VllmConfig )
1115from vllm .lora .models import LoRAMapping
1216from vllm .lora .request import LoRARequest
17+ from vllm .v1 .worker .gpu_worker import Worker as V1Worker
1318from vllm .worker .worker import Worker
1419
1520
21+ @pytest .fixture (autouse = True )
22+ def v1 (run_with_both_engines_lora ):
23+ # Simple autouse wrapper to run both engines for each test
24+ # This can be promoted up to conftest.py to run for every
25+ # test in a package
26+ pass
27+
28+
1629@patch .dict (os .environ , {"RANK" : "0" })
1730def test_worker_apply_lora (sql_lora_files ):
31+
32+ def set_active_loras (worker : Union [Worker , V1Worker ],
33+ lora_requests : list [LoRARequest ]):
34+ lora_mapping = LoRAMapping ([], [])
35+ if isinstance (worker , Worker ):
36+ # v0 case
37+ worker .model_runner .set_active_loras (lora_requests , lora_mapping )
38+ else :
39+ # v1 case
40+ worker .model_runner .lora_manager .set_active_adapters (
41+ lora_requests , lora_mapping )
42+
43+ worker_cls = V1Worker if envs .VLLM_USE_V1 else Worker
44+
1845 vllm_config = VllmConfig (
1946 model_config = ModelConfig (
2047 "meta-llama/Llama-2-7b-hf" ,
@@ -40,24 +67,25 @@ def test_worker_apply_lora(sql_lora_files):
4067 lora_config = LoRAConfig (max_lora_rank = 8 , max_cpu_loras = 32 ,
4168 max_loras = 32 ),
4269 )
43- worker = Worker (
70+ worker = worker_cls (
4471 vllm_config = vllm_config ,
4572 local_rank = 0 ,
4673 rank = 0 ,
4774 distributed_init_method = f"file://{ tempfile .mkstemp ()[1 ]} " ,
4875 )
76+
4977 worker .init_device ()
5078 worker .load_model ()
5179
52- worker . model_runner . set_active_loras ([], LoRAMapping ([], []) )
80+ set_active_loras (worker , [] )
5381 assert worker .list_loras () == set ()
5482
5583 n_loras = 32
5684 lora_requests = [
5785 LoRARequest (str (i + 1 ), i + 1 , sql_lora_files ) for i in range (n_loras )
5886 ]
5987
60- worker . model_runner . set_active_loras (lora_requests , LoRAMapping ([], []) )
88+ set_active_loras (worker , lora_requests )
6189 assert worker .list_loras () == {
6290 lora_request .lora_int_id
6391 for lora_request in lora_requests
@@ -69,8 +97,7 @@ def test_worker_apply_lora(sql_lora_files):
6997 k = random .randint (1 , n_loras ))
7098 random .shuffle (iter_lora_requests )
7199 iter_lora_requests = iter_lora_requests [:- random .randint (0 , n_loras )]
72- worker .model_runner .set_active_loras (iter_lora_requests ,
73- LoRAMapping ([], []))
100+ set_active_loras (worker , lora_requests )
74101 assert worker .list_loras ().issuperset (
75102 {lora_request .lora_int_id
76103 for lora_request in iter_lora_requests })
0 commit comments