Skip to content

Commit c646e95

Browse files
committed
Support transcription for single agent live scenario
1 parent 2c75293 commit c646e95

File tree

2 files changed

+155
-4
lines changed

2 files changed

+155
-4
lines changed

src/google/adk/runners.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -716,12 +716,12 @@ def _new_invocation_context_for_live(
716716
live_request_queue: Optional[LiveRequestQueue] = None,
717717
run_config: Optional[RunConfig] = None,
718718
) -> InvocationContext:
719-
"""Creates a new invocation context for live multi-agent."""
719+
"""Creates a new invocation context for live single and multi-agent scenarios."""
720720
run_config = run_config or RunConfig()
721721

722722
# For live multi-agent, we need model's text transcription as context for
723-
# next agent.
724-
if self.agent.sub_agents and live_request_queue:
723+
# next agent. For single-agent, we need a general transcription support.
724+
if live_request_queue:
725725
if not run_config.response_modalities:
726726
# default
727727
run_config.response_modalities = ['AUDIO']
@@ -735,7 +735,8 @@ def _new_invocation_context_for_live(
735735
types.AudioTranscriptionConfig()
736736
)
737737
if not run_config.input_audio_transcription:
738-
# need this input transcription for agent transferring in live mode.
738+
# need this input transcription for agent transferring in multi-agent live
739+
# mode and for general transcription support in single agent live mode.
739740
run_config.input_audio_transcription = types.AudioTranscriptionConfig()
740741
return self._new_invocation_context(
741742
session,

tests/unittests/streaming/test_live_streaming_configs.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,153 @@ def test_streaming_with_session_resumption_config():
586586
llm_request_sent_to_mock.live_connect_config.session_resumption.transparent
587587
is True
588588
)
589+
590+
591+
def test_single_agent_live_streaming_with_transcription():
592+
"""Test single-agent streaming with transcription configs."""
593+
response1 = LlmResponse(
594+
turn_complete=True,
595+
)
596+
597+
mock_model = testing_utils.MockModel.create([response1])
598+
599+
root_agent = Agent(
600+
name='single_agent',
601+
model=mock_model,
602+
tools=[],
603+
)
604+
605+
runner = testing_utils.InMemoryRunner(
606+
root_agent=root_agent, response_modalities=['AUDIO']
607+
)
608+
609+
# Create run config without explicitly setting transcription
610+
run_config = RunConfig()
611+
612+
live_request_queue = LiveRequestQueue()
613+
live_request_queue.send_realtime(
614+
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
615+
)
616+
617+
res_events = runner.run_live(live_request_queue, run_config)
618+
619+
assert res_events is not None, 'Expected a list of events, got None.'
620+
assert (
621+
len(res_events) > 0
622+
), 'Expected at least one response, but got an empty list.'
623+
assert len(mock_model.requests) == 1
624+
625+
# Get the request that was captured
626+
llm_request_sent_to_mock = mock_model.requests[0]
627+
628+
# Assert that transcription configs were added
629+
assert llm_request_sent_to_mock.live_connect_config is not None
630+
assert (
631+
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
632+
is not None
633+
)
634+
assert (
635+
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
636+
is not None
637+
)
638+
639+
640+
def test_single_agent_live_streaming_respects_explicit_transcription():
641+
"""Test that single-agent live streaming respects explicitly provided transcription configs."""
642+
response1 = LlmResponse(
643+
turn_complete=True,
644+
)
645+
646+
mock_model = testing_utils.MockModel.create([response1])
647+
648+
# Create a single agent (no sub_agents)
649+
root_agent = Agent(
650+
name='single_agent',
651+
model=mock_model,
652+
tools=[],
653+
)
654+
655+
runner = testing_utils.InMemoryRunner(
656+
root_agent=root_agent, response_modalities=['AUDIO']
657+
)
658+
659+
# Create run config with input and output audio transcription
660+
explicit_output_config = types.AudioTranscriptionConfig()
661+
explicit_input_config = types.AudioTranscriptionConfig()
662+
run_config = RunConfig(
663+
output_audio_transcription=explicit_output_config,
664+
input_audio_transcription=explicit_input_config,
665+
)
666+
667+
live_request_queue = LiveRequestQueue()
668+
live_request_queue.send_realtime(
669+
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
670+
)
671+
672+
res_events = runner.run_live(live_request_queue, run_config)
673+
674+
assert res_events is not None, 'Expected a list of events, got None.'
675+
assert (
676+
len(res_events) > 0
677+
), 'Expected at least one response, but got an empty list.'
678+
assert len(mock_model.requests) == 1
679+
680+
# Get the request that was captured
681+
llm_request_sent_to_mock = mock_model.requests[0]
682+
683+
# Assert that the explicit configs were used
684+
assert llm_request_sent_to_mock.live_connect_config is not None
685+
assert (
686+
llm_request_sent_to_mock.live_connect_config.output_audio_transcription
687+
is explicit_output_config
688+
)
689+
assert (
690+
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
691+
is explicit_input_config
692+
)
693+
694+
695+
def test_single_agent_live_streaming_with_text_modality():
696+
"""Test single-agent live streaming with TEXT in response modalities."""
697+
response1 = LlmResponse(
698+
turn_complete=True,
699+
)
700+
701+
mock_model = testing_utils.MockModel.create([response1])
702+
703+
# Create a single agent (no sub_agents)
704+
root_agent = Agent(
705+
name='single_agent',
706+
model=mock_model,
707+
tools=[],
708+
)
709+
710+
runner = testing_utils.InMemoryRunner(root_agent=root_agent)
711+
712+
# Create run config with TEXT in response modalities
713+
run_config = RunConfig(response_modalities=['TEXT', 'AUDIO'])
714+
715+
live_request_queue = LiveRequestQueue()
716+
live_request_queue.send_realtime(
717+
blob=types.Blob(data=b'\x00\xFF', mime_type='audio/pcm')
718+
)
719+
720+
res_events = runner.run_live(live_request_queue, run_config)
721+
722+
assert res_events is not None, 'Expected a list of events, got None.'
723+
assert (
724+
len(res_events) > 0
725+
), 'Expected at least one response, but got an empty list.'
726+
assert len(mock_model.requests) == 1
727+
728+
# Get the request that was captured
729+
llm_request_sent_to_mock = mock_model.requests[0]
730+
731+
# Assert that input transcription is enabled
732+
assert llm_request_sent_to_mock.live_connect_config is not None
733+
assert (
734+
llm_request_sent_to_mock.live_connect_config.input_audio_transcription
735+
is not None
736+
)
737+
738+

0 commit comments

Comments
 (0)