diff --git a/sdk/ai/azure-ai-voicelive/assets.json b/sdk/ai/azure-ai-voicelive/assets.json
index f30b3604a3f8..fc857c734577 100644
--- a/sdk/ai/azure-ai-voicelive/assets.json
+++ b/sdk/ai/azure-ai-voicelive/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-voicelive",
-  "Tag": "python/ai/azure-ai-voicelive_9c84c5d9c2"
+  "Tag": "python/ai/azure-ai-voicelive_50b02ab530"
 }
diff --git a/sdk/ai/azure-ai-voicelive/pyproject.toml b/sdk/ai/azure-ai-voicelive/pyproject.toml
index 1908161491fc..1e46f6a72aeb 100644
--- a/sdk/ai/azure-ai-voicelive/pyproject.toml
+++ b/sdk/ai/azure-ai-voicelive/pyproject.toml
@@ -76,3 +76,6 @@ pytyped = ["py.typed"]
 [tool.pytest.ini_options]
 asyncio_default_fixture_loop_scope = "function"
 asyncio_mode = "auto"
+markers = [
+    "e2e: marks end-to-end tests that span multiple components or services",
+]
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-voicelive/tests.yml b/sdk/ai/azure-ai-voicelive/tests.yml
index 93510e5783ab..97375f7bd1f8 100644
--- a/sdk/ai/azure-ai-voicelive/tests.yml
+++ b/sdk/ai/azure-ai-voicelive/tests.yml
@@ -6,8 +6,14 @@ extends:
       ServiceDirectory: ai
       TestResourceDirectories:
       - ai/azure-ai-voicelive
+      MatrixConfigs:
+        - Name: ai_ci_matrix
+          Path: sdk/ai/platform-matrix.json
+          Selection: sparse
+          GenerateVMJobs: true
       EnvVars:
-        AZURE_TEST_RUN_LIVE: 'true'
+        AZURE_TEST_RUN_LIVE: 'false'
         AZURE_TEST_USE_CLI_AUTH: 'true'
+        AZURE_SKIP_LIVE_RECORDING: 'true'
         VOICELIVE_OPENAI_ENDPOINT: $(VoiceLiveOpenAIEndpoint)
         VOICELIVE_OPENAI_API_KEY: $(VoiceLiveOpenAIApiKey)
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-voicelive/tests/conftest.py b/sdk/ai/azure-ai-voicelive/tests/conftest.py
index 1685df84d385..1185fff92f49 100644
--- a/sdk/ai/azure-ai-voicelive/tests/conftest.py
+++ b/sdk/ai/azure-ai-voicelive/tests/conftest.py
@@ -7,29 +7,9 @@
 import base64
 from devtools_testutils import test_proxy, is_live
 
-from devtools_testutils.helpers import locate_assets
 from pathlib import Path
 
 
-def pytest_runtest_setup(item):
-    is_live_only_test_marked = bool([mark for mark in item.iter_markers(name="live_test_only")])
-    if is_live_only_test_marked:
-        if not is_live():
-            pytest.skip("live test only")
-
-    is_playback_test_marked = bool([mark for mark in item.iter_markers(name="playback_test_only")])
-    if is_playback_test_marked:
-        if is_live():
-            pytest.skip("playback test only")
-
-
-@pytest.fixture
-def test_data_dir() -> Path:
-    base = Path(locate_assets(current_test_file=Path(__file__)))  # ensure Path
-    full = base / "python" / "sdk" / "ai" / "azure-ai-voicelive" / "tests" / "data"
-    return full
-
-
 @pytest.fixture(scope="session", autouse=True)
 def start_proxy(test_proxy):
     return
diff --git a/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py b/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py
index d44970c4a4c6..ae2201364b65 100644
--- a/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py
+++ b/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py
@@ -4,6 +4,7 @@
 import base64
 import asyncio
 import json
+import os
 
 from pathlib import Path
 from typing import Callable, Iterator, Literal, Mapping, Union, Any, Type
@@ -52,6 +53,7 @@
 )
 
 from devtools_testutils import AzureRecordedTestCase, recorded_by_proxy
+from devtools_testutils.helpers import locate_assets
 from .voicelive_preparer import VoiceLivePreparer
 
 
@@ -159,25 +161,31 @@ async def _collect_audio_trans_outputs(conn, duration_s: float) -> int:
         pass
     return audio_events, trans_events
 
+def _test_data_dir() -> Path:
+    base = Path(locate_assets(current_test_file=Path(__file__)))  # ensure Path
+    full = base / "python" / "sdk" / "ai" / "azure-ai-voicelive" / "tests" / "data"
+    return full
 
 class TestRealtimeService(AzureRecordedTestCase):
 
     @VoiceLivePreparer()
     @recorded_by_proxy
-    def smoke_test(self, **kwargs):
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    def test_smoke(self, **kwargs):
+        data_path = _test_data_dir()
+        assert data_path.exists() and data_path.is_dir()
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY")
         assert voicelive_openai_endpoint
         assert voicelive_openai_api_key
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
-    async def test_realtime_service(self, test_data_dir: Path, model: str, **kwargs):
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
-        file = test_data_dir / "4.wav"
+    async def test_realtime_service(self, model: str, **kwargs):
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
+        file = _test_data_dir() / "4.wav"
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -220,14 +228,14 @@ async def test_realtime_service(self, test_data_dir: Path, model: str, **kwargs)
             assert audio_delta_evt.type in {ServerEventType.RESPONSE_AUDIO_DELTA}
             assert audio_delta_evt.delta is not None and len(audio_delta_evt.delta) > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
-    async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Path, model: str, **kwargs):
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
-        file = test_data_dir / "4.wav"
+    async def test_realtime_service_with_audio_enhancements(self, model: str, **kwargs):
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
+        file = _test_data_dir() / "4.wav"
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -245,7 +253,7 @@ async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Pat
             audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED)
             assert audio_segments == 5
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -264,11 +272,11 @@ async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Pat
         ],
     )
     async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
-        self, test_data_dir: Path, model: str, server_sd_conf: dict, **kwargs
+        self, model: str, server_sd_conf: dict, **kwargs
     ):
-        file = test_data_dir / "4.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        file = _test_data_dir() / "4.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -288,7 +296,7 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
             assert audio_delta_evt.type in {ServerEventType.RESPONSE_AUDIO_DELTA}
             assert audio_delta_evt.delta is not None and len(audio_delta_evt.delta) > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -306,11 +314,11 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration(
         ],
     )
     async def test_realtime_service_with_turn_detection_multilingual(
-        self, test_data_dir: Path, model: str, semantic_vad_params: dict, **kwargs
+        self, model: str, semantic_vad_params: dict, **kwargs
     ):
-        file = test_data_dir / "4.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        file = _test_data_dir() / "4.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -325,7 +333,7 @@ async def test_realtime_service_with_turn_detection_multilingual(
             assert audio_segments == 5
             assert audio_bytes > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -335,11 +343,11 @@ async def test_realtime_service_with_turn_detection_multilingual(
             "filler2_end_24kHz.wav",
         ],
     )
-    async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Path, test_audio_file: str, **kwargs):
+    async def test_realtime_service_with_filler_word_removal(self, test_audio_file: str, **kwargs):
         model = "gpt-4o-realtime-preview"
-        file = test_data_dir / test_audio_file
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        file = _test_data_dir() / test_audio_file
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -352,7 +360,7 @@ async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Pa
             audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED)
             assert audio_segments == 1
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -364,16 +372,16 @@ async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Pa
         ],
     )
     async def test_realtime_service_with_filler_word_removal_multilingual(
-        self, test_data_dir: Path, test_audio_file: str, **kwargs
+        self, test_audio_file: str, **kwargs
     ):
         model = "gpt-4o-realtime-preview"
-        file = test_data_dir / test_audio_file
+        file = _test_data_dir() / test_audio_file
         server_sd_conf = {
             "remove_filler_words": True,
         }
 
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -387,14 +395,14 @@ async def test_realtime_service_with_filler_word_removal_multilingual(
             audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED)
             assert audio_segments == 1
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o"])
-    async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, **kwargs):
-        audio_file = test_data_dir / "one-sentence.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_tool_call(self, model: str, **kwargs):
+        audio_file = _test_data_dir() / "one-sentence.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -437,16 +445,16 @@ async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str,
 
             assert len(function_call_results) > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview-2025-06-03", "gpt-4o", "gpt-5-chat"])
-    async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: str, **kwargs):
+    async def test_realtime_service_tool_choice(self, model: str, **kwargs):
         if "realtime" in model:
             pytest.skip("Tool choice is not supported in realtime models yet")
-        audio_file = test_data_dir / "ask_weather.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        audio_file = _test_data_dir() / "ask_weather.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -525,20 +533,20 @@ async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: st
             assert function_done.arguments in ['{"location":"北京"}', '{"location":"Beijing"}']
             assert function_done.name == "get_time"
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "gpt-5", "phi4-mm-realtime"])
-    async def test_realtime_service_tool_call_parameter(self, test_data_dir: Path, model: str, **kwargs):
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_tool_call_parameter(self, model: str, **kwargs):
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
 
         def get_weather(arguments: Union[str, Mapping[str, Any]]) -> str:
             return json.dumps({"location": "Beijing", "weather": "sunny", "temp_c": 25})
 
         if "realtime" in model:
             pytest.skip("Tool choice is not supported in realtime models yet")
-        audio_file = test_data_dir / "ask_weather.wav"
+        audio_file = _test_data_dir() / "ask_weather.wav"
         tools = [
             FunctionTool(
                 name="get_weather",
@@ -605,14 +613,14 @@ def get_weather(arguments: Union[str, Mapping[str, Any]]) -> str:
             assert "晴" in transcript or "sunny" in transcript
             assert "25" in transcript
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"])
-    async def test_realtime_service_live_session_update(self, test_data_dir: Path, model: str, **kwargs):
-        audio_file = test_data_dir / "ask_weather.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_live_session_update(self, model: str, **kwargs):
+        audio_file = _test_data_dir() / "ask_weather.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -671,14 +679,14 @@ async def test_realtime_service_live_session_update(self, test_data_dir: Path, m
             assert audio_bytes > 50 * 1000
             assert transcripts == 1
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"])
-    async def test_realtime_service_tool_call_no_audio_overlap(self, test_data_dir: Path, model: str, **kwargs):
-        audio_file = test_data_dir / "audio_overlap.input_audio1.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_tool_call_no_audio_overlap(self, model: str, **kwargs):
+        audio_file = _test_data_dir() / "audio_overlap.input_audio1.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         tools = [
             FunctionTool(
                 name="fetch_merchant_details",
@@ -723,21 +731,20 @@ async def test_realtime_service_tool_call_no_audio_overlap(self, test_data_dir:
 
             assert len(message_types) == 2
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
     @pytest.mark.parametrize("transcription_model", ["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"])
     async def test_realtime_service_input_audio_transcription(
         self,
-        test_data_dir: Path,
         model: str,
         transcription_model: Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"],
         **kwargs,
     ):
-        file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -757,7 +764,7 @@ async def test_realtime_service_input_audio_transcription(
 
             assert input_audio_transcription_completed_evt.transcript.strip() == "What's the largest lake in the world?"
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -783,15 +790,14 @@ async def test_realtime_service_input_audio_transcription(
     )
     async def test_realtime_service_with_eou(
         self,
-        test_data_dir: Path,
         model: str,
         turn_detection_cls: Type[Union["ServerVad", "AzureSemanticVad", "AzureSemanticVadMultilingual"]],
         end_of_detection: Type[Union["AzureSemanticDetection", "AzureSemanticDetectionEn"]],
         **kwargs,
     ):
-        file = test_data_dir / "phone.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        file = _test_data_dir() / "phone.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         turn_detection = turn_detection_cls(end_of_utterance_detection=end_of_detection(timeout_ms=2000))
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
@@ -808,17 +814,17 @@ async def test_realtime_service_with_eou(
             assert events > 0
             assert audio_bytes > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
-    async def test_realtime_service_with_audio_timestamp_viseme(self, test_data_dir: Path, model: str, **kwargs):
-        file = test_data_dir / "4.wav"
+    async def test_realtime_service_with_audio_timestamp_viseme(self, model: str, **kwargs):
+        file = _test_data_dir() / "4.wav"
         response_audio_word_timestamps = []
         response_blendshape_visemes = []
         audio_bytes = 0
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -856,14 +862,14 @@ async def test_realtime_service_with_audio_timestamp_viseme(self, test_data_dir:
             assert len(response_audio_word_timestamps) > 0
             assert len(response_blendshape_visemes) > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"])
-    async def test_realtime_service_wo_turn_detection(self, test_data_dir: Path, model: str, **kwargs):
-        file = test_data_dir / "ask_weather.mp3"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_wo_turn_detection(self, model: str, **kwargs):
+        file = _test_data_dir() / "ask_weather.mp3"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -883,14 +889,14 @@ async def test_realtime_service_wo_turn_detection(self, test_data_dir: Path, mod
             assert audio_events > 0
             assert trans_events > 0
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "phi4-mm-realtime"])
-    async def test_realtime_service_with_voice_properties(self, test_data_dir: Path, model: str, **kwargs):
-        file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_with_voice_properties(self, model: str, **kwargs):
+        file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -909,14 +915,14 @@ async def test_realtime_service_with_voice_properties(self, test_data_dir: Path,
             )
             assert content_part_added_events == 1
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
-    async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: str, **kwargs):
-        file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_retrieve_item(self, model: str, **kwargs):
+        file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -948,14 +954,14 @@ async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model:
                 conversation_retrieved_event.item.content[0], ContentPart
             ), f"Retrieved item content should be audio: {conversation_retrieved_event.item.content[0]}."
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
-    async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: str, **kwargs):
-        file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+    async def test_realtime_service_truncate_item(self, model: str, **kwargs):
+        file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -977,7 +983,7 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model:
                 conversation_retrieved_event, ServerEventConversationItemTruncated
             ), f"Retrieved item should be an ServerEventConversationItemTruncated: {conversation_retrieved_event}."
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -1040,7 +1046,7 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model:
         ],
     )
     async def test_realtime_service_with_input_audio_format(
-        self, test_data_dir: Path, model: str, audio_format: InputAudioFormat, turn_detection: TurnDetection, **kwargs
+        self, model: str, audio_format: InputAudioFormat, turn_detection: TurnDetection, **kwargs
     ):
         """Test that all supported input_audio_format values work correctly with all models.
 
@@ -1049,15 +1055,15 @@ async def test_realtime_service_with_input_audio_format(
         the service can process audio properly regardless of the input format.
         """
 
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         # Use the appropriate audio file for each format
         if audio_format == InputAudioFormat.PCM16:
-            audio_file = test_data_dir / "largest_lake.wav"
+            audio_file = _test_data_dir() / "largest_lake.wav"
         elif audio_format == InputAudioFormat.G711_ULAW:
-            audio_file = test_data_dir / "largest_lake.ulaw"
+            audio_file = _test_data_dir() / "largest_lake.ulaw"
         elif audio_format == InputAudioFormat.G711_ALAW:
-            audio_file = test_data_dir / "largest_lake.alaw"
+            audio_file = _test_data_dir() / "largest_lake.alaw"
         else:
             raise ValueError(f"Unsupported audio format: {audio_format}")
 
@@ -1088,7 +1094,7 @@ async def test_realtime_service_with_input_audio_format(
             _, audio_bytes = await _collect_event(conn, event_type=None)
             assert audio_bytes > 50 * 1000, f"Output audio too short for {audio_format} format: {audio_bytes} bytes"
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize(
@@ -1105,7 +1111,7 @@ async def test_realtime_service_with_input_audio_format(
         ],
     )
     async def test_realtime_service_with_input_audio_sampling_rate(
-        self, test_data_dir: Path, model: str, sampling_rate: int, **kwargs
+        self, model: str, sampling_rate: int, **kwargs
     ):
         """Test that the realtime service works correctly with different input audio sampling rates.
 
@@ -1116,10 +1122,10 @@ async def test_realtime_service_with_input_audio_sampling_rate(
         4. Both resampling enabled and disabled scenarios work correctly
         """
 
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         # Use the specified audio file
-        audio_file = test_data_dir / f"largest_lake.{sampling_rate // 1000}kHz.wav"
+        audio_file = _test_data_dir() / f"largest_lake.{sampling_rate // 1000}kHz.wav"
 
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
@@ -1146,7 +1152,7 @@ async def test_realtime_service_with_input_audio_sampling_rate(
             _, audio_bytes = await _collect_event(conn, event_type=ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_DELTA)
             assert audio_bytes > 50 * 1000, f"Output audio too short for {audio_file}: {audio_bytes} bytes"
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4.1", "phi4-mini"])
@@ -1165,11 +1171,11 @@ async def test_realtime_service_with_input_audio_sampling_rate(
         ],
     )
     async def test_output_formats_with_azure_voice(
-        self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs
+        self, model: str, audio_output_format: str, **kwargs
     ):
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
-        audio_file = test_data_dir / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
+        audio_file = _test_data_dir() / "largest_lake.wav"
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -1189,7 +1195,7 @@ async def test_output_formats_with_azure_voice(
             assert events == 1
             assert audio_bytes > 10 * 1024
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime"])
@@ -1202,11 +1208,11 @@ async def test_output_formats_with_azure_voice(
         ],
     )
     async def test_output_formats_with_openai_voice(
-        self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs
+        self, model: str, audio_output_format: str, **kwargs
     ):
-        audio_file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        audio_file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -1226,15 +1232,15 @@ async def test_output_formats_with_openai_voice(
             assert events == 1
             assert audio_bytes > 10 * 1024
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"])
-    async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path, model: str, **kwargs):
+    async def test_realtime_service_with_echo_cancellation(self, model: str, **kwargs):
         """Test echo cancellation in the realtime service."""
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
-        file = test_data_dir / "4.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
+        file = _test_data_dir() / "4.wav"
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
@@ -1252,7 +1258,7 @@ async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path
             assert segments > 1, "Expected more than 1 speech segment"
             assert audio_bytes > 0, "Audio bytes should be greater than 0"
 
-    @pytest.mark.live_test_only
+    @pytest.mark.e2e
     @VoiceLivePreparer()
     @pytest.mark.flaky(reruns=3, reruns_delay=2)
     @pytest.mark.parametrize("model", ["gpt-4.1", "phi4-mm-realtime", "phi4-mini"])
@@ -1271,12 +1277,12 @@ async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path
         ],
     )
     async def test_write_loopback_audio_echo_cancellation(
-        self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs
+        self, model: str, audio_output_format: str, **kwargs
     ):
         """Test echo cancellation functionality with write_loopback_audio for different audio formats."""
-        audio_file = test_data_dir / "largest_lake.wav"
-        voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint")
-        voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key")
+        audio_file = _test_data_dir() / "largest_lake.wav"
+        voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint")
+        voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key")
         async with connect(
             endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model
         ) as conn:
diff --git a/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py b/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py
index ac382f96e6a9..cdc54ef79fe1 100644
--- a/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py
+++ b/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py
@@ -1,10 +1,10 @@
 import functools
 import os
-from devtools_testutils import AzureRecordedTestCase, EnvironmentVariableLoader, recorded_by_proxy
+from devtools_testutils import AzureRecordedTestCase, EnvironmentVariableLoader, recorded_by_proxy, is_live
 
 VoiceLivePreparer = functools.partial(
     EnvironmentVariableLoader,
     "voicelive",
-    voicelive_openai_endpoint=os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint"),
-    voicelive_openai_api_key=os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key"),
+    voicelive_openai_endpoint="fake_endpoint",
+    voicelive_openai_api_key="fake_api_key",
 )
diff --git a/sdk/ai/ci.yml b/sdk/ai/ci.yml
index 20ceb03e1897..d9adc932b832 100644
--- a/sdk/ai/ci.yml
+++ b/sdk/ai/ci.yml
@@ -35,7 +35,7 @@ extends:
     # The job "Test ubuntu2404_pypy39" in the "python - ai" pipeline hangs and eventually times out.
     # Disable it until the issue is understood.
     MatrixConfigs:
-      - Name: communication_ci_matrix
+      - Name: ai_ci_matrix
         Path: sdk/ai/platform-matrix.json
         Selection: sparse
         GenerateVMJobs: true