diff --git a/sdk/ai/azure-ai-voicelive/assets.json b/sdk/ai/azure-ai-voicelive/assets.json index f30b3604a3f8..fc857c734577 100644 --- a/sdk/ai/azure-ai-voicelive/assets.json +++ b/sdk/ai/azure-ai-voicelive/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "python", "TagPrefix": "python/ai/azure-ai-voicelive", - "Tag": "python/ai/azure-ai-voicelive_9c84c5d9c2" + "Tag": "python/ai/azure-ai-voicelive_50b02ab530" } diff --git a/sdk/ai/azure-ai-voicelive/pyproject.toml b/sdk/ai/azure-ai-voicelive/pyproject.toml index 1908161491fc..1e46f6a72aeb 100644 --- a/sdk/ai/azure-ai-voicelive/pyproject.toml +++ b/sdk/ai/azure-ai-voicelive/pyproject.toml @@ -76,3 +76,6 @@ pytyped = ["py.typed"] [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" asyncio_mode = "auto" +markers = [ + "e2e: marks end-to-end tests that span multiple components or services", +] \ No newline at end of file diff --git a/sdk/ai/azure-ai-voicelive/tests.yml b/sdk/ai/azure-ai-voicelive/tests.yml index 93510e5783ab..97375f7bd1f8 100644 --- a/sdk/ai/azure-ai-voicelive/tests.yml +++ b/sdk/ai/azure-ai-voicelive/tests.yml @@ -6,8 +6,14 @@ extends: ServiceDirectory: ai TestResourceDirectories: - ai/azure-ai-voicelive + MatrixConfigs: + - Name: ai_ci_matrix + Path: sdk/ai/platform-matrix.json + Selection: sparse + GenerateVMJobs: true EnvVars: - AZURE_TEST_RUN_LIVE: 'true' + AZURE_TEST_RUN_LIVE: 'false' AZURE_TEST_USE_CLI_AUTH: 'true' + AZURE_SKIP_LIVE_RECORDING: 'true' VOICELIVE_OPENAI_ENDPOINT: $(VoiceLiveOpenAIEndpoint) VOICELIVE_OPENAI_API_KEY: $(VoiceLiveOpenAIApiKey) \ No newline at end of file diff --git a/sdk/ai/azure-ai-voicelive/tests/conftest.py b/sdk/ai/azure-ai-voicelive/tests/conftest.py index 1685df84d385..1185fff92f49 100644 --- a/sdk/ai/azure-ai-voicelive/tests/conftest.py +++ b/sdk/ai/azure-ai-voicelive/tests/conftest.py @@ -7,29 +7,9 @@ import base64 from devtools_testutils import test_proxy, is_live -from devtools_testutils.helpers import locate_assets from pathlib import Path -def pytest_runtest_setup(item): - is_live_only_test_marked = bool([mark for mark in item.iter_markers(name="live_test_only")]) - if is_live_only_test_marked: - if not is_live(): - pytest.skip("live test only") - - is_playback_test_marked = bool([mark for mark in item.iter_markers(name="playback_test_only")]) - if is_playback_test_marked: - if is_live(): - pytest.skip("playback test only") - - -@pytest.fixture -def test_data_dir() -> Path: - base = Path(locate_assets(current_test_file=Path(__file__))) # ensure Path - full = base / "python" / "sdk" / "ai" / "azure-ai-voicelive" / "tests" / "data" - return full - - @pytest.fixture(scope="session", autouse=True) def start_proxy(test_proxy): return diff --git a/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py b/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py index d44970c4a4c6..ae2201364b65 100644 --- a/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py +++ b/sdk/ai/azure-ai-voicelive/tests/test_live_realtime_service.py @@ -4,6 +4,7 @@ import base64 import asyncio import json +import os from pathlib import Path from typing import Callable, Iterator, Literal, Mapping, Union, Any, Type @@ -52,6 +53,7 @@ ) from devtools_testutils import AzureRecordedTestCase, recorded_by_proxy +from devtools_testutils.helpers import locate_assets from .voicelive_preparer import VoiceLivePreparer @@ -159,25 +161,31 @@ async def _collect_audio_trans_outputs(conn, duration_s: float) -> int: pass return audio_events, trans_events +def _test_data_dir() -> Path: + base = Path(locate_assets(current_test_file=Path(__file__))) # ensure Path + full = base / "python" / "sdk" / "ai" / "azure-ai-voicelive" / "tests" / "data" + return full class TestRealtimeService(AzureRecordedTestCase): @VoiceLivePreparer() @recorded_by_proxy - def smoke_test(self, **kwargs): - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + def test_smoke(self, **kwargs): + data_path = _test_data_dir() + assert data_path.exists() and data_path.is_dir() + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY") assert voicelive_openai_endpoint assert voicelive_openai_api_key - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1", "phi4-mm-realtime", "phi4-mini"]) - async def test_realtime_service(self, test_data_dir: Path, model: str, **kwargs): - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") - file = test_data_dir / "4.wav" + async def test_realtime_service(self, model: str, **kwargs): + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") + file = _test_data_dir() / "4.wav" async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -220,14 +228,14 @@ async def test_realtime_service(self, test_data_dir: Path, model: str, **kwargs) assert audio_delta_evt.type in {ServerEventType.RESPONSE_AUDIO_DELTA} assert audio_delta_evt.delta is not None and len(audio_delta_evt.delta) > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"]) - async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Path, model: str, **kwargs): - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") - file = test_data_dir / "4.wav" + async def test_realtime_service_with_audio_enhancements(self, model: str, **kwargs): + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") + file = _test_data_dir() / "4.wav" async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -245,7 +253,7 @@ async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Pat audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED) assert audio_segments == 5 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -264,11 +272,11 @@ async def test_realtime_service_with_audio_enhancements(self, test_data_dir: Pat ], ) async def test_realtime_service_with_turn_detection_long_tts_vad_duration( - self, test_data_dir: Path, model: str, server_sd_conf: dict, **kwargs + self, model: str, server_sd_conf: dict, **kwargs ): - file = test_data_dir / "4.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + file = _test_data_dir() / "4.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -288,7 +296,7 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration( assert audio_delta_evt.type in {ServerEventType.RESPONSE_AUDIO_DELTA} assert audio_delta_evt.delta is not None and len(audio_delta_evt.delta) > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -306,11 +314,11 @@ async def test_realtime_service_with_turn_detection_long_tts_vad_duration( ], ) async def test_realtime_service_with_turn_detection_multilingual( - self, test_data_dir: Path, model: str, semantic_vad_params: dict, **kwargs + self, model: str, semantic_vad_params: dict, **kwargs ): - file = test_data_dir / "4.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + file = _test_data_dir() / "4.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -325,7 +333,7 @@ async def test_realtime_service_with_turn_detection_multilingual( assert audio_segments == 5 assert audio_bytes > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -335,11 +343,11 @@ async def test_realtime_service_with_turn_detection_multilingual( "filler2_end_24kHz.wav", ], ) - async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Path, test_audio_file: str, **kwargs): + async def test_realtime_service_with_filler_word_removal(self, test_audio_file: str, **kwargs): model = "gpt-4o-realtime-preview" - file = test_data_dir / test_audio_file - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + file = _test_data_dir() / test_audio_file + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -352,7 +360,7 @@ async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Pa audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED) assert audio_segments == 1 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -364,16 +372,16 @@ async def test_realtime_service_with_filler_word_removal(self, test_data_dir: Pa ], ) async def test_realtime_service_with_filler_word_removal_multilingual( - self, test_data_dir: Path, test_audio_file: str, **kwargs + self, test_audio_file: str, **kwargs ): model = "gpt-4o-realtime-preview" - file = test_data_dir / test_audio_file + file = _test_data_dir() / test_audio_file server_sd_conf = { "remove_filler_words": True, } - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -387,14 +395,14 @@ async def test_realtime_service_with_filler_word_removal_multilingual( audio_segments, _ = await _collect_event(conn, event_type=ServerEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED) assert audio_segments == 1 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o"]) - async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, **kwargs): - audio_file = test_data_dir / "one-sentence.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_tool_call(self, model: str, **kwargs): + audio_file = _test_data_dir() / "one-sentence.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -437,16 +445,16 @@ async def test_realtime_service_tool_call(self, test_data_dir: Path, model: str, assert len(function_call_results) > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview-2025-06-03", "gpt-4o", "gpt-5-chat"]) - async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: str, **kwargs): + async def test_realtime_service_tool_choice(self, model: str, **kwargs): if "realtime" in model: pytest.skip("Tool choice is not supported in realtime models yet") - audio_file = test_data_dir / "ask_weather.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + audio_file = _test_data_dir() / "ask_weather.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -525,20 +533,20 @@ async def test_realtime_service_tool_choice(self, test_data_dir: Path, model: st assert function_done.arguments in ['{"location":"北京"}', '{"location":"Beijing"}'] assert function_done.name == "get_time" - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "gpt-5", "phi4-mm-realtime"]) - async def test_realtime_service_tool_call_parameter(self, test_data_dir: Path, model: str, **kwargs): - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_tool_call_parameter(self, model: str, **kwargs): + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") def get_weather(arguments: Union[str, Mapping[str, Any]]) -> str: return json.dumps({"location": "Beijing", "weather": "sunny", "temp_c": 25}) if "realtime" in model: pytest.skip("Tool choice is not supported in realtime models yet") - audio_file = test_data_dir / "ask_weather.wav" + audio_file = _test_data_dir() / "ask_weather.wav" tools = [ FunctionTool( name="get_weather", @@ -605,14 +613,14 @@ def get_weather(arguments: Union[str, Mapping[str, Any]]) -> str: assert "晴" in transcript or "sunny" in transcript assert "25" in transcript - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"]) - async def test_realtime_service_live_session_update(self, test_data_dir: Path, model: str, **kwargs): - audio_file = test_data_dir / "ask_weather.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_live_session_update(self, model: str, **kwargs): + audio_file = _test_data_dir() / "ask_weather.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -671,14 +679,14 @@ async def test_realtime_service_live_session_update(self, test_data_dir: Path, m assert audio_bytes > 50 * 1000 assert transcripts == 1 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o", "gpt-4o-realtime"]) - async def test_realtime_service_tool_call_no_audio_overlap(self, test_data_dir: Path, model: str, **kwargs): - audio_file = test_data_dir / "audio_overlap.input_audio1.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_tool_call_no_audio_overlap(self, model: str, **kwargs): + audio_file = _test_data_dir() / "audio_overlap.input_audio1.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") tools = [ FunctionTool( name="fetch_merchant_details", @@ -723,21 +731,20 @@ async def test_realtime_service_tool_call_no_audio_overlap(self, test_data_dir: assert len(message_types) == 2 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime"]) @pytest.mark.parametrize("transcription_model", ["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]) async def test_realtime_service_input_audio_transcription( self, - test_data_dir: Path, model: str, transcription_model: Literal["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"], **kwargs, ): - file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -757,7 +764,7 @@ async def test_realtime_service_input_audio_transcription( assert input_audio_transcription_completed_evt.transcript.strip() == "What's the largest lake in the world?" - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -783,15 +790,14 @@ async def test_realtime_service_input_audio_transcription( ) async def test_realtime_service_with_eou( self, - test_data_dir: Path, model: str, turn_detection_cls: Type[Union["ServerVad", "AzureSemanticVad", "AzureSemanticVadMultilingual"]], end_of_detection: Type[Union["AzureSemanticDetection", "AzureSemanticDetectionEn"]], **kwargs, ): - file = test_data_dir / "phone.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + file = _test_data_dir() / "phone.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") turn_detection = turn_detection_cls(end_of_utterance_detection=end_of_detection(timeout_ms=2000)) async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model @@ -808,17 +814,17 @@ async def test_realtime_service_with_eou( assert events > 0 assert audio_bytes > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"]) - async def test_realtime_service_with_audio_timestamp_viseme(self, test_data_dir: Path, model: str, **kwargs): - file = test_data_dir / "4.wav" + async def test_realtime_service_with_audio_timestamp_viseme(self, model: str, **kwargs): + file = _test_data_dir() / "4.wav" response_audio_word_timestamps = [] response_blendshape_visemes = [] audio_bytes = 0 - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -856,14 +862,14 @@ async def test_realtime_service_with_audio_timestamp_viseme(self, test_data_dir: assert len(response_audio_word_timestamps) > 0 assert len(response_blendshape_visemes) > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4o", "phi4-mm-realtime", "phi4-mini"]) - async def test_realtime_service_wo_turn_detection(self, test_data_dir: Path, model: str, **kwargs): - file = test_data_dir / "ask_weather.mp3" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_wo_turn_detection(self, model: str, **kwargs): + file = _test_data_dir() / "ask_weather.mp3" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -883,14 +889,14 @@ async def test_realtime_service_wo_turn_detection(self, test_data_dir: Path, mod assert audio_events > 0 assert trans_events > 0 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime", "gpt-4.1", "phi4-mm-realtime"]) - async def test_realtime_service_with_voice_properties(self, test_data_dir: Path, model: str, **kwargs): - file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_with_voice_properties(self, model: str, **kwargs): + file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -909,14 +915,14 @@ async def test_realtime_service_with_voice_properties(self, test_data_dir: Path, ) assert content_part_added_events == 1 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime"]) - async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: str, **kwargs): - file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_retrieve_item(self, model: str, **kwargs): + file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -948,14 +954,14 @@ async def test_realtime_service_retrieve_item(self, test_data_dir: Path, model: conversation_retrieved_event.item.content[0], ContentPart ), f"Retrieved item content should be audio: {conversation_retrieved_event.item.content[0]}." - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime"]) - async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: str, **kwargs): - file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + async def test_realtime_service_truncate_item(self, model: str, **kwargs): + file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -977,7 +983,7 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: conversation_retrieved_event, ServerEventConversationItemTruncated ), f"Retrieved item should be an ServerEventConversationItemTruncated: {conversation_retrieved_event}." - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -1040,7 +1046,7 @@ async def test_realtime_service_truncate_item(self, test_data_dir: Path, model: ], ) async def test_realtime_service_with_input_audio_format( - self, test_data_dir: Path, model: str, audio_format: InputAudioFormat, turn_detection: TurnDetection, **kwargs + self, model: str, audio_format: InputAudioFormat, turn_detection: TurnDetection, **kwargs ): """Test that all supported input_audio_format values work correctly with all models. @@ -1049,15 +1055,15 @@ async def test_realtime_service_with_input_audio_format( the service can process audio properly regardless of the input format. """ - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") # Use the appropriate audio file for each format if audio_format == InputAudioFormat.PCM16: - audio_file = test_data_dir / "largest_lake.wav" + audio_file = _test_data_dir() / "largest_lake.wav" elif audio_format == InputAudioFormat.G711_ULAW: - audio_file = test_data_dir / "largest_lake.ulaw" + audio_file = _test_data_dir() / "largest_lake.ulaw" elif audio_format == InputAudioFormat.G711_ALAW: - audio_file = test_data_dir / "largest_lake.alaw" + audio_file = _test_data_dir() / "largest_lake.alaw" else: raise ValueError(f"Unsupported audio format: {audio_format}") @@ -1088,7 +1094,7 @@ async def test_realtime_service_with_input_audio_format( _, audio_bytes = await _collect_event(conn, event_type=None) assert audio_bytes > 50 * 1000, f"Output audio too short for {audio_format} format: {audio_bytes} bytes" - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize( @@ -1105,7 +1111,7 @@ async def test_realtime_service_with_input_audio_format( ], ) async def test_realtime_service_with_input_audio_sampling_rate( - self, test_data_dir: Path, model: str, sampling_rate: int, **kwargs + self, model: str, sampling_rate: int, **kwargs ): """Test that the realtime service works correctly with different input audio sampling rates. @@ -1116,10 +1122,10 @@ async def test_realtime_service_with_input_audio_sampling_rate( 4. Both resampling enabled and disabled scenarios work correctly """ - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") # Use the specified audio file - audio_file = test_data_dir / f"largest_lake.{sampling_rate // 1000}kHz.wav" + audio_file = _test_data_dir() / f"largest_lake.{sampling_rate // 1000}kHz.wav" async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model @@ -1146,7 +1152,7 @@ async def test_realtime_service_with_input_audio_sampling_rate( _, audio_bytes = await _collect_event(conn, event_type=ServerEventType.RESPONSE_AUDIO_TRANSCRIPT_DELTA) assert audio_bytes > 50 * 1000, f"Output audio too short for {audio_file}: {audio_bytes} bytes" - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4.1", "phi4-mini"]) @@ -1165,11 +1171,11 @@ async def test_realtime_service_with_input_audio_sampling_rate( ], ) async def test_output_formats_with_azure_voice( - self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs + self, model: str, audio_output_format: str, **kwargs ): - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") - audio_file = test_data_dir / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") + audio_file = _test_data_dir() / "largest_lake.wav" async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -1189,7 +1195,7 @@ async def test_output_formats_with_azure_voice( assert events == 1 assert audio_bytes > 10 * 1024 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime"]) @@ -1202,11 +1208,11 @@ async def test_output_formats_with_azure_voice( ], ) async def test_output_formats_with_openai_voice( - self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs + self, model: str, audio_output_format: str, **kwargs ): - audio_file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + audio_file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -1226,15 +1232,15 @@ async def test_output_formats_with_openai_voice( assert events == 1 assert audio_bytes > 10 * 1024 - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4o-realtime-preview", "gpt-4.1"]) - async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path, model: str, **kwargs): + async def test_realtime_service_with_echo_cancellation(self, model: str, **kwargs): """Test echo cancellation in the realtime service.""" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") - file = test_data_dir / "4.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") + file = _test_data_dir() / "4.wav" async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: @@ -1252,7 +1258,7 @@ async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path assert segments > 1, "Expected more than 1 speech segment" assert audio_bytes > 0, "Audio bytes should be greater than 0" - @pytest.mark.live_test_only + @pytest.mark.e2e @VoiceLivePreparer() @pytest.mark.flaky(reruns=3, reruns_delay=2) @pytest.mark.parametrize("model", ["gpt-4.1", "phi4-mm-realtime", "phi4-mini"]) @@ -1271,12 +1277,12 @@ async def test_realtime_service_with_echo_cancellation(self, test_data_dir: Path ], ) async def test_write_loopback_audio_echo_cancellation( - self, test_data_dir: Path, model: str, audio_output_format: str, **kwargs + self, model: str, audio_output_format: str, **kwargs ): """Test echo cancellation functionality with write_loopback_audio for different audio formats.""" - audio_file = test_data_dir / "largest_lake.wav" - voicelive_openai_endpoint = kwargs.pop("voicelive_openai_endpoint") - voicelive_openai_api_key = kwargs.pop("voicelive_openai_api_key") + audio_file = _test_data_dir() / "largest_lake.wav" + voicelive_openai_endpoint = os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint") + voicelive_openai_api_key = os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key") async with connect( endpoint=voicelive_openai_endpoint, credential=AzureKeyCredential(voicelive_openai_api_key), model=model ) as conn: diff --git a/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py b/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py index ac382f96e6a9..cdc54ef79fe1 100644 --- a/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py +++ b/sdk/ai/azure-ai-voicelive/tests/voicelive_preparer.py @@ -1,10 +1,10 @@ import functools import os -from devtools_testutils import AzureRecordedTestCase, EnvironmentVariableLoader, recorded_by_proxy +from devtools_testutils import AzureRecordedTestCase, EnvironmentVariableLoader, recorded_by_proxy, is_live VoiceLivePreparer = functools.partial( EnvironmentVariableLoader, "voicelive", - voicelive_openai_endpoint=os.getenv("VOICELIVE_OPENAI_ENDPOINT", "fake_endpoint"), - voicelive_openai_api_key=os.getenv("VOICELIVE_OPENAI_API_KEY", "fake_api_key"), + voicelive_openai_endpoint="fake_endpoint", + voicelive_openai_api_key="fake_api_key", ) diff --git a/sdk/ai/ci.yml b/sdk/ai/ci.yml index 20ceb03e1897..d9adc932b832 100644 --- a/sdk/ai/ci.yml +++ b/sdk/ai/ci.yml @@ -35,7 +35,7 @@ extends: # The job "Test ubuntu2404_pypy39" in the "python - ai" pipeline hangs and eventually times out. # Disable it until the issue is understood. MatrixConfigs: - - Name: communication_ci_matrix + - Name: ai_ci_matrix Path: sdk/ai/platform-matrix.json Selection: sparse GenerateVMJobs: true