From b8cdfa0a32e691c5bce5489bc4dd71317a08d3c5 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 3 Dec 2024 14:52:41 +0000 Subject: [PATCH 1/4] [CI] Turn on basic correctnes tests for V1 Signed-off-by: Tyler Michael Smith --- tests/basic_correctness/test_basic_correctness.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index fcba253d159f..bfc2681f0e74 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -26,6 +26,15 @@ TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4") +@pytest.fixture(autouse=True) +def v1(run_with_both_engines): + # Simple autouse wrapper to run both engines for each test + # This can be promoted up to conftest.py to run for every + # test in a package + pass + + + def test_vllm_gc_ed(): """Verify vllm instance is GC'ed when it is deleted""" llm = LLM("facebook/opt-125m") @@ -83,7 +92,7 @@ def test_models( name_1="vllm", ) - +@pytest.mark.skip_v1 @multi_gpu_test(num_gpus=2) @pytest.mark.parametrize( "model, distributed_executor_backend, attention_backend, " @@ -143,6 +152,7 @@ def test_models_distributed( ) +@pytest.mark.skip_v1 def test_model_with_failure(vllm_runner) -> None: try: with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward", @@ -169,6 +179,7 @@ def test_model_with_failure(vllm_runner) -> None: os.remove(filename) +@pytest.mark.skip_v1 def test_failure_with_async_out_proc(vllm_runner) -> None: filename = None From f80427a1d28e0ffc102e8e7c34fb8f74e771387a Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Tue, 3 Dec 2024 15:02:21 +0000 Subject: [PATCH 2/4] format Signed-off-by: Tyler Michael Smith --- tests/basic_correctness/test_basic_correctness.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index bfc2681f0e74..3848d0ccf91b 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -34,7 +34,6 @@ def v1(run_with_both_engines): pass - def test_vllm_gc_ed(): """Verify vllm instance is GC'ed when it is deleted""" llm = LLM("facebook/opt-125m") @@ -92,6 +91,7 @@ def test_models( name_1="vllm", ) + @pytest.mark.skip_v1 @multi_gpu_test(num_gpus=2) @pytest.mark.parametrize( From 8ef4d238a575982a313aef5a45d2d10183cdbda9 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Fri, 6 Dec 2024 10:19:30 -0500 Subject: [PATCH 3/4] Don't skip test_model_with_failure Signed-off-by: Tyler Michael Smith --- tests/basic_correctness/test_basic_correctness.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index 3848d0ccf91b..707741ae4c66 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -152,7 +152,6 @@ def test_models_distributed( ) -@pytest.mark.skip_v1 def test_model_with_failure(vllm_runner) -> None: try: with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward", From 1f3e007539118e6043c1f8c6960b75c7ca9dfb97 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Wed, 11 Dec 2024 13:38:51 -0500 Subject: [PATCH 4/4] merge Signed-off-by: Tyler Michael Smith --- tests/basic_correctness/test_basic_correctness.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index b79e69c6c61a..8ee62ad33039 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -44,7 +44,6 @@ def test_vllm_gc_ed(): assert weak_llm() is None -@pytest.mark.skip_v1 @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS", "FLASHINFER"]) @pytest.mark.parametrize("dtype", ["half"]) @@ -93,7 +92,6 @@ def test_models( ) -@pytest.mark.skip_v1 @multi_gpu_test(num_gpus=2) @pytest.mark.parametrize( "model, distributed_executor_backend, attention_backend, "