3434 "LiquidAI/LFM2-1.2B" ,
3535]
3636
37- HF_UNSUPPORTED_MODELS = [
38- # The HF transformers implementation of
39- # Mamba2 is buggy for Codestral as it doesn't handle n_groups, so the test
40- # doesn't compare vLLM output with HF output.
41- # See https://github.com/huggingface/transformers/pull/35943
42- "yujiepan/mamba2-codestral-v0.1-tiny-random" ,
43- # transformers 4.55 is still producing garbage for this model
44- # TODO(tdoublep): follow-up on transformers side
45- "ibm-granite/granite-4.0-tiny-preview"
46- ]
47-
4837V1_SUPPORTED_MODELS = [
4938 "state-spaces/mamba-130m-hf" ,
5039 "ai21labs/Jamba-tiny-dev" ,
@@ -90,20 +79,13 @@ def test_models(
9079 try :
9180 model_info = HF_EXAMPLE_MODELS .find_hf_info (model )
9281 model_info .check_available_online (on_fail = "skip" )
93- hf_version_check = model_info .check_transformers_version (
94- on_fail = "return" )
82+ model_info .check_transformers_version (on_fail = "skip" )
9583 except ValueError :
96- hf_version_check = None
97-
98- if hf_version_check is not None :
99- print (f"Skipping transformers comparison because: { hf_version_check } " )
84+ pass
10085
10186 with hf_runner (model ) as hf_model :
102- if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None :
103- hf_outputs = hf_model .generate_greedy_logprobs_limit (
104- example_prompts , max_tokens , num_logprobs )
105- else :
106- hf_outputs = None
87+ hf_outputs = hf_model .generate_greedy_logprobs_limit (
88+ example_prompts , max_tokens , num_logprobs )
10789
10890 with monkeypatch .context () as m :
10991 m .setenv ("VLLM_USE_V1" , "0" )
@@ -121,7 +103,7 @@ def test_models(
121103 else :
122104 vllm_v1_outputs = None
123105
124- if hf_outputs is not None and vllm_v0_outputs is not None :
106+ if vllm_v0_outputs is not None :
125107 check_logprobs_close (
126108 outputs_0_lst = hf_outputs ,
127109 outputs_1_lst = vllm_v0_outputs ,
@@ -130,12 +112,10 @@ def test_models(
130112 )
131113
132114 if model in V1_SUPPORTED_MODELS :
133- ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
134- assert ref_outputs is not None
135115 check_logprobs_close (
136- outputs_0_lst = ref_outputs ,
116+ outputs_0_lst = hf_outputs ,
137117 outputs_1_lst = vllm_v1_outputs ,
138- name_0 = "hf" if hf_outputs is not None else "vllm-v0" ,
118+ name_0 = "hf" ,
139119 name_1 = "vllm-v1" ,
140120 )
141121
@@ -402,11 +382,8 @@ def test_full_cuda_graph(
402382 pass
403383
404384 with hf_runner (model ) as hf_model :
405- if model not in HF_UNSUPPORTED_MODELS :
406- hf_outputs = hf_model .generate_greedy_logprobs_limit (
407- example_prompts , max_tokens , num_logprobs )
408- else :
409- hf_outputs = None
385+ hf_outputs = hf_model .generate_greedy_logprobs_limit (
386+ example_prompts , max_tokens , num_logprobs )
410387
411388 with monkeypatch .context () as m :
412389 m .setenv ("VLLM_USE_V1" , "0" )
@@ -421,20 +398,18 @@ def test_full_cuda_graph(
421398 vllm_v1_outputs = vllm_model .generate_greedy_logprobs (
422399 example_prompts , max_tokens , num_logprobs )
423400
424- if hf_outputs is not None and vllm_v0_outputs is not None :
401+ if vllm_v0_outputs is not None :
425402 check_logprobs_close (
426403 outputs_0_lst = hf_outputs ,
427404 outputs_1_lst = vllm_v0_outputs ,
428405 name_0 = "hf" ,
429406 name_1 = "vllm-v0" ,
430407 )
431408
432- ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
433- assert ref_outputs is not None
434409 check_logprobs_close (
435- outputs_0_lst = ref_outputs ,
410+ outputs_0_lst = hf_outputs ,
436411 outputs_1_lst = vllm_v1_outputs ,
437- name_0 = "hf" if hf_outputs is not None else "vllm-v0" ,
412+ name_0 = "hf" ,
438413 name_1 = "vllm-v1" ,
439414 )
440415
@@ -460,11 +435,8 @@ def test_fp32_state(
460435 pass
461436
462437 with hf_runner (model ) as hf_model :
463- if model not in HF_UNSUPPORTED_MODELS :
464- hf_outputs = hf_model .generate_greedy_logprobs_limit (
465- example_prompts , max_tokens , num_logprobs )
466- else :
467- hf_outputs = None
438+ hf_outputs = hf_model .generate_greedy_logprobs_limit (
439+ example_prompts , max_tokens , num_logprobs )
468440
469441 with monkeypatch .context () as m :
470442 m .setenv ("VLLM_USE_V1" , "0" )
@@ -480,18 +452,16 @@ def test_fp32_state(
480452 vllm_v1_outputs = vllm_model .generate_greedy_logprobs (
481453 example_prompts , max_tokens , num_logprobs )
482454
483- if hf_outputs is not None :
484- check_logprobs_close (
485- outputs_0_lst = hf_outputs ,
486- outputs_1_lst = vllm_v0_outputs ,
487- name_0 = "hf" ,
488- name_1 = "vllm-v0" ,
489- )
455+ check_logprobs_close (
456+ outputs_0_lst = hf_outputs ,
457+ outputs_1_lst = vllm_v0_outputs ,
458+ name_0 = "hf" ,
459+ name_1 = "vllm-v0" ,
460+ )
490461
491- ref_outputs = hf_outputs if hf_outputs is not None else vllm_v0_outputs
492462 check_logprobs_close (
493- outputs_0_lst = ref_outputs ,
463+ outputs_0_lst = hf_outputs ,
494464 outputs_1_lst = vllm_v1_outputs ,
495- name_0 = "hf" if hf_outputs is not None else "vllm-v0" ,
465+ name_0 = "hf" ,
496466 name_1 = "vllm-v1" ,
497467 )
0 commit comments