Skip to content

Commit 50d7733

Browse files
committed
feat: Add transcription support for single-agent live scenarios
1 parent 955632c commit 50d7733

File tree

2 files changed

+105
-4
lines changed

2 files changed

+105
-4
lines changed

src/google/adk/runners.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,12 +1104,12 @@ def _new_invocation_context_for_live(
11041104
live_request_queue: Optional[LiveRequestQueue] = None,
11051105
run_config: Optional[RunConfig] = None,
11061106
) -> InvocationContext:
1107-
"""Creates a new invocation context for live multi-agent."""
1107+
"""Creates a new invocation context for live single and multi-agent scenarios."""
11081108
run_config = run_config or RunConfig()
11091109

11101110
# For live multi-agent, we need model's text transcription as context for
1111-
# next agent.
1112-
if self.agent.sub_agents and live_request_queue:
1111+
# next agent. For single-agent, we need a general transcription support.
1112+
if live_request_queue:
11131113
if not run_config.response_modalities:
11141114
# default
11151115
run_config.response_modalities = ['AUDIO']
@@ -1123,7 +1123,8 @@ def _new_invocation_context_for_live(
11231123
types.AudioTranscriptionConfig()
11241124
)
11251125
if not run_config.input_audio_transcription:
1126-
# need this input transcription for agent transferring in live mode.
1126+
# need this input transcription for agent transferring in multi-agent live
1127+
# mode and for general transcription support in single agent live mode.
11271128
run_config.input_audio_transcription = types.AudioTranscriptionConfig()
11281129
return self._new_invocation_context(
11291130
session,

tests/unittests/streaming/test_live_streaming_configs.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,3 +642,103 @@ def test_streaming_with_context_window_compression_config():
642642
llm_request_sent_to_mock.live_connect_config.context_window_compression.sliding_window.target_tokens
643643
== 500
644644
)
645+
646+
647+
def test_single_agent_live_streaming_with_transcription():
648+
"""Test single-agent streaming adds transcription configs when not provided."""
649+
response1 = LlmResponse(
650+
turn_complete=True,
651+
)
652+
653+
mock_model = testing_utils.MockModel.create([response1])
654+
655+
root_agent = Agent(
656+
name='single_agent',
657+
model=mock_model,
658+
tools=[],
659+
)
660+
661+
runner = testing_utils.InMemoryRunner(root_agent=root_agent)
662+
663+
# Test without passing any run_config to verify default behavior
664+
# The logic in _new_invocation_context_for_live should automatically add
665+
# transcription configs for live streaming
666+
live_request_queue = LiveRequestQueue()
667+
live_request_queue.send_realtime(
668+
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
669+
)
670+
671+
res_events = runner.run_live(live_request_queue)
672+
673+
assert res_events is not None, 'Expected a list of events, got None.'
674+
assert (
675+
len(res_events) > 0
676+
), 'Expected at least one response, but got an empty list.'
677+
assert len(mock_model.requests) == 1
678+
679+
# Get the request that was captured
680+
llm_request_sent_to_mock = mock_model.requests[0]
681+
682+
# Assert that transcription configs were added
683+
assert llm_request_sent_to_mock.live_connect_config is not None
684+
assert (
685+
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
686+
is not None
687+
)
688+
assert (
689+
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
690+
is not None
691+
)
692+
693+
694+
def test_single_agent_live_streaming_respects_explicit_transcription():
695+
"""Test that single-agent live streaming respects explicitly provided transcription configs."""
696+
response1 = LlmResponse(
697+
turn_complete=True,
698+
)
699+
700+
mock_model = testing_utils.MockModel.create([response1])
701+
702+
# Create a single agent (no sub_agents)
703+
root_agent = Agent(
704+
name='single_agent',
705+
model=mock_model,
706+
tools=[],
707+
)
708+
709+
runner = testing_utils.InMemoryRunner(root_agent=root_agent)
710+
711+
# Create run config with input and output audio transcription
712+
explicit_output_config = types.AudioTranscriptionConfig()
713+
explicit_input_config = types.AudioTranscriptionConfig()
714+
run_config = RunConfig(
715+
output_audio_transcription=explicit_output_config,
716+
input_audio_transcription=explicit_input_config,
717+
)
718+
719+
live_request_queue = LiveRequestQueue()
720+
live_request_queue.send_realtime(
721+
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
722+
)
723+
724+
res_events = runner.run_live(live_request_queue, run_config)
725+
726+
assert res_events is not None, 'Expected a list of events, got None.'
727+
assert (
728+
len(res_events) > 0
729+
), 'Expected at least one response, but got an empty list.'
730+
assert len(mock_model.requests) == 1
731+
732+
# Get the request that was captured
733+
llm_request_sent_to_mock = mock_model.requests[0]
734+
735+
# Assert that the explicit configs were used
736+
assert llm_request_sent_to_mock.live_connect_config is not None
737+
assert (
738+
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
739+
is explicit_output_config
740+
)
741+
assert (
742+
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
743+
is explicit_input_config
744+
)

0 commit comments

Comments
 (0)