Skip to content

Commit 5da15aa

Browse files
authored
Merge d01a970 into 0afe7cb
2 parents 0afe7cb + d01a970 commit 5da15aa

File tree

7 files changed

+242
-24
lines changed

7 files changed

+242
-24
lines changed

firebase-ai/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Unreleased
22

3+
- [changed] Added `LiveAudioConversationConfig` to control different aspects of the conversation
4+
while using the `startAudioConversation` function.
35
- [changed] Added better scheduling and louder output for Live API.
46
- [changed] Added support for input and output transcription. (#7482)
57
- [feature] Added support for sending realtime audio and video in a `LiveSession`.

firebase-ai/api.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ package com.google.firebase.ai.java {
152152
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
153153
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
154154
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
155+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig);
155156
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
156157
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
157158
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
@@ -838,6 +839,31 @@ package com.google.firebase.ai.type {
838839
public final class InvalidStateException extends com.google.firebase.ai.type.FirebaseAIException {
839840
}
840841

842+
@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveAudioConversationConfig {
843+
field public static final com.google.firebase.ai.type.LiveAudioConversationConfig.Companion Companion;
844+
}
845+
846+
public static final class LiveAudioConversationConfig.Builder {
847+
ctor public LiveAudioConversationConfig.Builder();
848+
method public com.google.firebase.ai.type.LiveAudioConversationConfig build();
849+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setAudioHandler(kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler);
850+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setEnableInterruptions(boolean enableInterruptions);
851+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setFunctionCallHandler(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
852+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder setTranscriptHandler(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
853+
field public kotlin.jvm.functions.Function2<? super android.media.AudioRecord,? super android.media.AudioTrack,kotlin.Unit>? audioHandler;
854+
field public boolean enableInterruptions;
855+
field public kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler;
856+
field public kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler;
857+
}
858+
859+
public static final class LiveAudioConversationConfig.Companion {
860+
method public com.google.firebase.ai.type.LiveAudioConversationConfig.Builder builder();
861+
}
862+
863+
public final class LiveAudioConversationConfigKt {
864+
method public static com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.LiveAudioConversationConfig.Builder,kotlin.Unit> init);
865+
}
866+
841867
@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveGenerationConfig {
842868
field public static final com.google.firebase.ai.type.LiveGenerationConfig.Companion Companion;
843869
}
@@ -922,6 +948,7 @@ package com.google.firebase.ai.type {
922948
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
923949
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
924950
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
951+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(com.google.firebase.ai.type.LiveAudioConversationConfig liveAudioConversationConfig, kotlin.coroutines.Continuation<? super kotlin.Unit>);
925952
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
926953
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
927954
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);

firebase-ai/gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version=17.5.0
15+
version=99.9.9
1616
latestReleasedVersion=17.4.0

firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import com.google.firebase.ai.type.Content
2424
import com.google.firebase.ai.type.FunctionCallPart
2525
import com.google.firebase.ai.type.FunctionResponsePart
2626
import com.google.firebase.ai.type.InlineData
27+
import com.google.firebase.ai.type.LiveAudioConversationConfig
2728
import com.google.firebase.ai.type.LiveServerMessage
2829
import com.google.firebase.ai.type.LiveSession
2930
import com.google.firebase.ai.type.MediaData
@@ -49,6 +50,18 @@ public abstract class LiveSessionFutures internal constructor() {
4950
@RequiresPermission(RECORD_AUDIO)
5051
public abstract fun startAudioConversation(): ListenableFuture<Unit>
5152

53+
/**
54+
* Starts an audio conversation with the model, which can only be stopped using
55+
* [stopAudioConversation].
56+
*
57+
* @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to
58+
* control the various aspects of the conversation.
59+
*/
60+
@RequiresPermission(RECORD_AUDIO)
61+
public abstract fun startAudioConversation(
62+
liveAudioConversationConfig: LiveAudioConversationConfig
63+
): ListenableFuture<Unit>
64+
5265
/**
5366
* Starts an audio conversation with the model, which can only be stopped using
5467
* [stopAudioConversation] or [close].
@@ -298,6 +311,12 @@ public abstract class LiveSessionFutures internal constructor() {
298311
session.startAudioConversation(transcriptHandler = transcriptHandler)
299312
}
300313

314+
@RequiresPermission(RECORD_AUDIO)
315+
override fun startAudioConversation(liveAudioConversationConfig: LiveAudioConversationConfig) =
316+
SuspendToFutureAdapter.launchFuture {
317+
session.startAudioConversation(liveAudioConversationConfig)
318+
}
319+
301320
@RequiresPermission(RECORD_AUDIO)
302321
override fun startAudioConversation() =
303322
SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioHelper.kt

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package com.google.firebase.ai.type
1919
import android.Manifest
2020
import android.media.AudioAttributes
2121
import android.media.AudioFormat
22-
import android.media.AudioManager
2322
import android.media.AudioRecord
2423
import android.media.AudioTrack
2524
import android.media.MediaRecorder
@@ -157,28 +156,39 @@ internal class AudioHelper(
157156
*
158157
* It also makes it easier to read, since the long initialization is separate from the
159158
* constructor.
159+
*
160+
* @param audioHandler A callback that is invoked immediately following the successful
161+
* initialization of the associated [AudioRecord] and [AudioTrack] objects. This offers a final
162+
* opportunity to configure these objects, which will remain valid and effective for the
163+
* duration of the current audio session.
160164
*/
161165
@RequiresPermission(Manifest.permission.RECORD_AUDIO)
162-
fun build(): AudioHelper {
163-
val playbackTrack =
164-
AudioTrack(
165-
AudioAttributes.Builder()
166-
.setUsage(AudioAttributes.USAGE_MEDIA)
167-
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
168-
.build(),
166+
fun build(
167+
audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null
168+
): AudioHelper {
169+
val playTrackBuilder = AudioTrack.Builder()
170+
playTrackBuilder
171+
.setAudioFormat(
169172
AudioFormat.Builder()
170173
.setSampleRate(24000)
171174
.setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
172175
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
173-
.build(),
176+
.build()
177+
)
178+
.setAudioAttributes(
179+
AudioAttributes.Builder()
180+
.setUsage(AudioAttributes.USAGE_MEDIA)
181+
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
182+
.build()
183+
)
184+
.setBufferSizeInBytes(
174185
AudioTrack.getMinBufferSize(
175186
24000,
176187
AudioFormat.CHANNEL_OUT_MONO,
177188
AudioFormat.ENCODING_PCM_16BIT
178189
),
179-
AudioTrack.MODE_STREAM,
180-
AudioManager.AUDIO_SESSION_ID_GENERATE
181190
)
191+
.setTransferMode(AudioTrack.MODE_STREAM)
182192

183193
val bufferSize =
184194
AudioRecord.getMinBufferSize(
@@ -191,15 +201,22 @@ internal class AudioHelper(
191201
throw AudioRecordInitializationFailedException(
192202
"Audio Record buffer size is invalid ($bufferSize)"
193203
)
194-
195-
val recorder =
196-
AudioRecord(
197-
MediaRecorder.AudioSource.VOICE_COMMUNICATION,
198-
16000,
199-
AudioFormat.CHANNEL_IN_MONO,
200-
AudioFormat.ENCODING_PCM_16BIT,
201-
bufferSize
202-
)
204+
val recorderBuilder =
205+
AudioRecord.Builder()
206+
.setAudioSource(MediaRecorder.AudioSource.VOICE_COMMUNICATION)
207+
.setAudioFormat(
208+
AudioFormat.Builder()
209+
.setEncoding(AudioFormat.ENCODING_PCM_16BIT)
210+
.setSampleRate(16000)
211+
.setChannelMask(AudioFormat.CHANNEL_IN_MONO)
212+
.build()
213+
)
214+
.setBufferSizeInBytes(bufferSize)
215+
if (audioHandler != null) {
216+
audioHandler(recorderBuilder, playTrackBuilder)
217+
}
218+
val recorder = recorderBuilder.build()
219+
val playbackTrack = playTrackBuilder.build()
203220
if (recorder.state != AudioRecord.STATE_INITIALIZED)
204221
throw AudioRecordInitializationFailedException(
205222
"Audio Record initialization has failed. State: ${recorder.state}"
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright 2025 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.firebase.ai.type
18+
19+
import android.media.AudioRecord
20+
import android.media.AudioTrack
21+
22+
/**
23+
* Configuration parameters to use for conversation config.
24+
*
25+
* @property functionCallHandler A callback that is invoked whenever the model receives a function
26+
* call. The [FunctionResponsePart] that the callback function returns will be automatically sent to
27+
* the model.
28+
*
29+
* @property transcriptHandler A callback that is invoked whenever the model receives a transcript.
30+
* The first [Transcription] object is the input transcription, and the second is the output
31+
* transcription.
32+
*
33+
* @property audioHandler A callback that is invoked immediately following the successful
34+
* initialization of the associated [AudioRecord.Builder] and [AudioTrack.Builder] objects. This
35+
* offers a final opportunity to configure these objects, which will remain valid and effective for
36+
* the duration of the current audio session.
37+
*
38+
* @property enableInterruptions If enabled, allows the user to speak over or interrupt the model's
39+
* ongoing reply.
40+
*
41+
* **WARNING**: The user interruption feature relies on device-specific support, and may not be
42+
* consistently available.
43+
*/
44+
@PublicPreviewAPI
45+
public class LiveAudioConversationConfig
46+
private constructor(
47+
internal val functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
48+
internal val audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)?,
49+
internal val transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
50+
internal val enableInterruptions: Boolean
51+
) {
52+
53+
/**
54+
* Builder for creating a [LiveAudioConversationConfig].
55+
*
56+
* Mainly intended for Java interop. Kotlin consumers should use [liveAudioConversationConfig] for
57+
* a more idiomatic experience.
58+
*
59+
* @property functionCallHandler See [LiveAudioConversationConfig.functionCallHandler].
60+
*
61+
* @property audioHandler See [LiveAudioConversationConfig.audioHandler].
62+
*
63+
* @property transcriptHandler See [LiveAudioConversationConfig.transcriptHandler].
64+
*
65+
* @property enableInterruptions See [LiveAudioConversationConfig.enableInterruptions].
66+
*/
67+
public class Builder {
68+
@JvmField public var functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null
69+
@JvmField public var audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)? = null
70+
@JvmField public var transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null
71+
@JvmField public var enableInterruptions: Boolean = false
72+
73+
public fun setFunctionCallHandler(
74+
functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
75+
): Builder = apply { this.functionCallHandler = functionCallHandler }
76+
77+
public fun setAudioHandler(
78+
audioHandler: ((AudioRecord.Builder, AudioTrack.Builder) -> Unit)?
79+
): Builder = apply { this.audioHandler = audioHandler }
80+
81+
public fun setTranscriptHandler(
82+
transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
83+
): Builder = apply { this.transcriptHandler = transcriptHandler }
84+
85+
public fun setEnableInterruptions(enableInterruptions: Boolean): Builder = apply {
86+
this.enableInterruptions = enableInterruptions
87+
}
88+
89+
/** Create a new [LiveAudioConversationConfig] with the attached arguments. */
90+
public fun build(): LiveAudioConversationConfig =
91+
LiveAudioConversationConfig(
92+
functionCallHandler = functionCallHandler,
93+
audioHandler = audioHandler,
94+
transcriptHandler = transcriptHandler,
95+
enableInterruptions = enableInterruptions
96+
)
97+
}
98+
99+
public companion object {
100+
101+
/**
102+
* Alternative casing for [LiveAudioConversationConfig.Builder]:
103+
* ```
104+
* val config = LiveAudioConversationConfig.builder()
105+
* ```
106+
*/
107+
public fun builder(): Builder = Builder()
108+
}
109+
}
110+
111+
/**
112+
* Helper method to construct a [LiveAudioConversationConfig] in a DSL-like manner.
113+
*
114+
* Example Usage:
115+
* ```
116+
* liveAudioConversationConfig {
117+
* functionCallHandler = ...
118+
* audioHandler = ...
119+
* ...
120+
* }
121+
* ```
122+
*/
123+
@OptIn(PublicPreviewAPI::class)
124+
public fun liveAudioConversationConfig(
125+
init: LiveAudioConversationConfig.Builder.() -> Unit
126+
): LiveAudioConversationConfig {
127+
val builder = LiveAudioConversationConfig.builder()
128+
builder.init()
129+
return builder.build()
130+
}

firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,26 @@ internal constructor(
171171
transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
172172
enableInterruptions: Boolean = false,
173173
) {
174+
startAudioConversation(
175+
liveAudioConversationConfig {
176+
this.functionCallHandler = functionCallHandler
177+
this.transcriptHandler = transcriptHandler
178+
this.enableInterruptions = enableInterruptions
179+
}
180+
)
181+
}
182+
183+
/**
184+
* Starts an audio conversation with the model, which can only be stopped using
185+
* [stopAudioConversation] or [close].
186+
*
187+
* @param liveAudioConversationConfig A [LiveAudioConversationConfig] provided by the user to
188+
* control the various aspects of the conversation.
189+
*/
190+
@RequiresPermission(RECORD_AUDIO)
191+
public suspend fun startAudioConversation(
192+
liveAudioConversationConfig: LiveAudioConversationConfig
193+
) {
174194

175195
val context = firebaseApp.applicationContext
176196
if (
@@ -191,11 +211,14 @@ internal constructor(
191211
networkScope =
192212
CoroutineScope(blockingDispatcher + childJob() + CoroutineName("LiveSession Network"))
193213
audioScope = CoroutineScope(audioDispatcher + childJob() + CoroutineName("LiveSession Audio"))
194-
audioHelper = AudioHelper.build()
214+
audioHelper = AudioHelper.build(liveAudioConversationConfig.audioHandler)
195215

196216
recordUserAudio()
197-
processModelResponses(functionCallHandler, transcriptHandler)
198-
listenForModelPlayback(enableInterruptions)
217+
processModelResponses(
218+
liveAudioConversationConfig.functionCallHandler,
219+
liveAudioConversationConfig.transcriptHandler
220+
)
221+
listenForModelPlayback(liveAudioConversationConfig.enableInterruptions)
199222
}
200223
}
201224

0 commit comments

Comments
 (0)