From b8cdfa0a32e691c5bce5489bc4dd71317a08d3c5 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Tue, 3 Dec 2024 14:52:41 +0000
Subject: [PATCH 1/4] [CI] Turn on basic correctnes tests for  V1

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 tests/basic_correctness/test_basic_correctness.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
index fcba253d159f..bfc2681f0e74 100644
--- a/tests/basic_correctness/test_basic_correctness.py
+++ b/tests/basic_correctness/test_basic_correctness.py
@@ -26,6 +26,15 @@
 TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4")
 
 
+@pytest.fixture(autouse=True)
+def v1(run_with_both_engines):
+    # Simple autouse wrapper to run both engines for each test
+    # This can be promoted up to conftest.py to run for every
+    # test in a package
+    pass
+
+
+
 def test_vllm_gc_ed():
     """Verify vllm instance is GC'ed when it is deleted"""
     llm = LLM("facebook/opt-125m")
@@ -83,7 +92,7 @@ def test_models(
         name_1="vllm",
     )
 
-
+@pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model, distributed_executor_backend, attention_backend, "
@@ -143,6 +152,7 @@ def test_models_distributed(
     )
 
 
+@pytest.mark.skip_v1
 def test_model_with_failure(vllm_runner) -> None:
     try:
         with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward",
@@ -169,6 +179,7 @@ def test_model_with_failure(vllm_runner) -> None:
         os.remove(filename)
 
 
+@pytest.mark.skip_v1
 def test_failure_with_async_out_proc(vllm_runner) -> None:
 
     filename = None

From f80427a1d28e0ffc102e8e7c34fb8f74e771387a Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Tue, 3 Dec 2024 15:02:21 +0000
Subject: [PATCH 2/4] format

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 tests/basic_correctness/test_basic_correctness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
index bfc2681f0e74..3848d0ccf91b 100644
--- a/tests/basic_correctness/test_basic_correctness.py
+++ b/tests/basic_correctness/test_basic_correctness.py
@@ -34,7 +34,6 @@ def v1(run_with_both_engines):
     pass
 
 
-
 def test_vllm_gc_ed():
     """Verify vllm instance is GC'ed when it is deleted"""
     llm = LLM("facebook/opt-125m")
@@ -92,6 +91,7 @@ def test_models(
         name_1="vllm",
     )
 
+
 @pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(

From 8ef4d238a575982a313aef5a45d2d10183cdbda9 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Fri, 6 Dec 2024 10:19:30 -0500
Subject: [PATCH 3/4] Don't skip test_model_with_failure

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 tests/basic_correctness/test_basic_correctness.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
index 3848d0ccf91b..707741ae4c66 100644
--- a/tests/basic_correctness/test_basic_correctness.py
+++ b/tests/basic_correctness/test_basic_correctness.py
@@ -152,7 +152,6 @@ def test_models_distributed(
     )
 
 
-@pytest.mark.skip_v1
 def test_model_with_failure(vllm_runner) -> None:
     try:
         with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward",

From 1f3e007539118e6043c1f8c6960b75c7ca9dfb97 Mon Sep 17 00:00:00 2001
From: Tyler Michael Smith <tyler@neuralmagic.com>
Date: Wed, 11 Dec 2024 13:38:51 -0500
Subject: [PATCH 4/4] merge

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
---
 tests/basic_correctness/test_basic_correctness.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
index b79e69c6c61a..8ee62ad33039 100644
--- a/tests/basic_correctness/test_basic_correctness.py
+++ b/tests/basic_correctness/test_basic_correctness.py
@@ -44,7 +44,6 @@ def test_vllm_gc_ed():
     assert weak_llm() is None
 
 
-@pytest.mark.skip_v1
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS", "FLASHINFER"])
 @pytest.mark.parametrize("dtype", ["half"])
@@ -93,7 +92,6 @@ def test_models(
     )
 
 
-@pytest.mark.skip_v1
 @multi_gpu_test(num_gpus=2)
 @pytest.mark.parametrize(
     "model, distributed_executor_backend, attention_backend, "