48
48
torch .float32 : (5e-3 , 5e-3 ),
49
49
torch .bfloat16 : (3e-2 , 2e-2 ),
50
50
}
51
- # TODO: Modify this based on platform
52
- DEVICES = [
51
+
52
+ pytestmark = pytest .mark .skipif (
53
+ not (current_platform .is_cuda_alike () or current_platform .is_cpu ()),
54
+ reason = "Backend not supported" )
55
+
56
+ DEVICES = ([
53
57
f"cuda:{ i } " for i in range (1 if torch .cuda .device_count () == 1 else 2 )
54
- ]
58
+ ] if current_platform . is_cuda_alike () else [ "cpu" ])
55
59
56
60
#For GPU, we will launch different triton kernels between the prefill and decode
57
61
# stages, so we need to verify this. prefill stage(True) or decode stage(False)
@@ -198,6 +202,10 @@ def check_punica_wrapper(punica_wrapper) -> bool:
198
202
from vllm .lora .punica_wrapper .punica_gpu import PunicaWrapperGPU
199
203
200
204
return type (punica_wrapper ) is PunicaWrapperGPU
205
+ elif current_platform .is_cpu ():
206
+ from vllm .lora .punica_wrapper .punica_cpu import PunicaWrapperCPU
207
+
208
+ return type (punica_wrapper ) is PunicaWrapperCPU
201
209
else :
202
210
return False
203
211
@@ -211,7 +219,8 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
211
219
# For multi-GPU testing of Triton kernel, we must explicitly set the CUDA
212
220
# device, see: https://github.com/triton-lang/triton/issues/2925
213
221
# Same below.
214
- torch .cuda .set_device (device )
222
+ if current_platform .is_cuda_alike ():
223
+ torch .cuda .set_device (device )
215
224
216
225
torch .set_default_device (device )
217
226
max_loras = 8
@@ -313,7 +322,9 @@ def create_random_embedding_layer():
313
322
def test_embeddings_with_new_embeddings (dist_init , num_loras , device ,
314
323
vocab_size , stage ) -> None :
315
324
316
- torch .cuda .set_device (device )
325
+ if current_platform .is_cuda_alike ():
326
+ torch .cuda .set_device (device )
327
+
317
328
torch .set_default_device (device )
318
329
max_loras = 8
319
330
punica_wrapper = get_punica_wrapper (8192 , 256 , device )
@@ -450,7 +461,9 @@ def create_random_embedding_layer():
450
461
def test_lm_head_logits_processor (dist_init , num_loras , device , vocab_size ,
451
462
stage ) -> None :
452
463
453
- torch .cuda .set_device (device )
464
+ if current_platform .is_cuda_alike ():
465
+ torch .cuda .set_device (device )
466
+
454
467
torch .set_default_device (device )
455
468
max_loras = 8
456
469
punica_wrapper = get_punica_wrapper (8192 , 256 , device )
@@ -582,7 +595,9 @@ def _pretest():
582
595
def test_linear_replicated (dist_init , num_loras , device , stage ,
583
596
bias_enabled ) -> None :
584
597
585
- torch .cuda .set_device (device )
598
+ if current_platform .is_cuda_alike ():
599
+ torch .cuda .set_device (device )
600
+
586
601
torch .set_default_device (device )
587
602
punica_wrapper = get_punica_wrapper (8192 , 256 , device )
588
603
assert check_punica_wrapper (punica_wrapper )
@@ -695,7 +710,9 @@ def create_random_linear_replicated_layer():
695
710
def test_linear_parallel (dist_init , num_loras , orientation , fully_shard ,
696
711
device , stage , bias_enabled ) -> None :
697
712
698
- torch .cuda .set_device (device )
713
+ if current_platform .is_cuda_alike ():
714
+ torch .cuda .set_device (device )
715
+
699
716
torch .set_default_device (device )
700
717
punica_wrapper = get_punica_wrapper (8192 , 256 , device )
701
718
assert check_punica_wrapper (punica_wrapper )
@@ -818,7 +835,9 @@ def create_random_linear_parallel_layer():
818
835
def test_column_parallel_packed (dist_init , num_loras , repeats , fully_shard ,
819
836
device , stage , bias_enabled ) -> None :
820
837
821
- torch .cuda .set_device (device )
838
+ if current_platform .is_cuda_alike ():
839
+ torch .cuda .set_device (device )
840
+
822
841
torch .set_default_device (device )
823
842
punica_wrapper = get_punica_wrapper (8192 , 256 , device )
824
843
assert check_punica_wrapper (punica_wrapper )
@@ -971,6 +990,8 @@ class FakeConfig:
971
990
@pytest .mark .parametrize ("rotary_dim" , [None , 32 ])
972
991
@pytest .mark .parametrize ("head_size" , [32 , 108 ])
973
992
@pytest .mark .parametrize ("seq_len" , [11 , 1024 ])
993
+ @pytest .mark .skipif (not current_platform .is_cuda_alike (),
994
+ reason = "Only CUDA backends are supported" )
974
995
def test_rotary_embedding_long_context (dist_init , num_loras , device ,
975
996
scaling_factors , max_position ,
976
997
is_neox_style , rotary_dim , head_size ,
0 commit comments