From fa6fcf47966ad0af636f18f62a13cf404bbb92e6 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 11:58:49 -0500 Subject: [PATCH 01/40] Temp Stash --- .../firebase-vertexai.gradle.kts | 9 + .../firebase/vertexai/LiveGenerativeModel.kt | 4 +- .../firebase/vertexai/common/APIController.kt | 3 +- .../vertexai/java/LiveSessionFutures.kt | 18 +- .../firebase/vertexai/type/AudioHelper.kt | 198 +++--- ...entClientMessage.kt => LiveClientSetup.kt} | 16 +- .../vertexai/type/LiveGenerationConfig.kt | 2 +- .../firebase/vertexai/type/LiveSession.kt | 632 +++++++++++------- .../com/google/firebase/vertexai/type/Part.kt | 29 +- .../firebase/vertexai/LiveModelTests.kt | 183 +++++ .../google/firebase/vertexai/util/tests.kt | 7 +- .../firebase/vertexai/JavaCompileTests.java | 2 +- 12 files changed, 741 insertions(+), 362 deletions(-) rename firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/{BidiGenerateContentClientMessage.kt => LiveClientSetup.kt} (71%) create mode 100644 firebase-vertexai/src/test/java/com/google/firebase/vertexai/LiveModelTests.kt diff --git a/firebase-vertexai/firebase-vertexai.gradle.kts b/firebase-vertexai/firebase-vertexai.gradle.kts index f728e905cbb..c1c5a537956 100644 --- a/firebase-vertexai/firebase-vertexai.gradle.kts +++ b/firebase-vertexai/firebase-vertexai.gradle.kts @@ -121,6 +121,15 @@ dependencies { testImplementation(libs.robolectric) testImplementation(libs.truth) testImplementation(libs.mockito.core) + testImplementation("com.squareup.okhttp3:mockwebserver:4.12.0") + testImplementation(project(":integ-testing")) + testImplementation("io.ktor:ktor-server-websockets:2.3.2") { + // only needed for UI tests. Junit this could be ignored. + exclude("org.eclipse.jetty") + } + testImplementation("io.ktor:ktor-server-test-host:2.3.2") { + exclude("org.eclipse.jetty") + } androidTestImplementation(libs.androidx.espresso.core) androidTestImplementation(libs.androidx.test.junit) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt index e557b694620..c83b41bfa47 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt @@ -22,8 +22,8 @@ import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.vertexai.common.APIController import com.google.firebase.vertexai.common.AppCheckHeaderProvider -import com.google.firebase.vertexai.type.BidiGenerateContentClientMessage import com.google.firebase.vertexai.type.Content +import com.google.firebase.vertexai.type.LiveClientSetup import com.google.firebase.vertexai.type.LiveGenerationConfig import com.google.firebase.vertexai.type.LiveSession import com.google.firebase.vertexai.type.PublicPreviewAPI @@ -93,7 +93,7 @@ internal constructor( @OptIn(ExperimentalSerializationApi::class) public suspend fun connect(): LiveSession { val clientMessage = - BidiGenerateContentClientMessage( + LiveClientSetup( modelName, config?.toInternal(), tools?.map { it.toInternal() }, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt index da580429f8c..d5c538a1ef8 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt @@ -160,11 +160,12 @@ internal constructor( throw FirebaseCommonAIException.from(e) } - private fun getBidiEndpoint(location: String): String = + internal fun getBidiEndpoint(location: String): String = "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/$location?key=$key" suspend fun getWebSocketSession(location: String): ClientWebSocketSession = client.webSocketSession(getBidiEndpoint(location)) + fun generateContentStream( request: GenerateContentRequest ): Flow = diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index 044f83e8cc1..77291b864c7 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -16,19 +16,22 @@ package com.google.firebase.vertexai.java +import android.Manifest.permission.RECORD_AUDIO +import androidx.annotation.RequiresPermission import androidx.concurrent.futures.SuspendToFutureAdapter import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.vertexai.type.Content import com.google.firebase.vertexai.type.FunctionCallPart import com.google.firebase.vertexai.type.FunctionResponsePart +import com.google.firebase.vertexai.type.InlineDataPart import com.google.firebase.vertexai.type.LiveContentResponse import com.google.firebase.vertexai.type.LiveSession -import com.google.firebase.vertexai.type.MediaData import com.google.firebase.vertexai.type.PublicPreviewAPI import com.google.firebase.vertexai.type.SessionAlreadyReceivingException import kotlinx.coroutines.reactive.asPublisher import org.reactivestreams.Publisher +// TODO(daymxn): Make sure the javadocs here match the kotlin ones /** * Wrapper class providing Java compatible methods for [LiveSession]. * @@ -52,13 +55,9 @@ public abstract class LiveSessionFutures internal constructor() { * Stops the audio conversation with the Gemini Server. * * @see [startAudioConversation] - * @see [stopReceiving] */ public abstract fun stopAudioConversation(): ListenableFuture - /** Stop receiving from the server. */ - public abstract fun stopReceiving() - /** * Sends the function response from the client to the server. * @@ -72,9 +71,9 @@ public abstract class LiveSessionFutures internal constructor() { /** * Streams client data to the server. * - * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. + * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be sent. */ - public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture + public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture /** * Sends [data][Content] to the server. @@ -117,17 +116,16 @@ public abstract class LiveSessionFutures internal constructor() { override fun sendFunctionResponse(functionList: List) = SuspendToFutureAdapter.launchFuture { session.sendFunctionResponse(functionList) } - override fun sendMediaStream(mediaChunks: List) = + override fun sendMediaStream(mediaChunks: List) = SuspendToFutureAdapter.launchFuture { session.sendMediaStream(mediaChunks) } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) } override fun stopAudioConversation() = SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() } - - override fun stopReceiving() = session.stopReceiving() } public companion object { diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index 07219617cee..c7205042dba 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -17,6 +17,7 @@ package com.google.firebase.vertexai.type import android.Manifest +import android.media.AudioAttributes import android.media.AudioFormat import android.media.AudioManager import android.media.AudioRecord @@ -24,118 +25,127 @@ import android.media.AudioTrack import android.media.MediaRecorder import android.media.audiofx.AcousticEchoCanceler import androidx.annotation.RequiresPermission +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.emptyFlow import kotlinx.coroutines.flow.flow @PublicPreviewAPI -internal class AudioHelper { +internal class AudioHelper( + // Record for recording the user's mic + private val recorder: AudioRecord, + // Track for playing back what the model says + private val playbackTrack: AudioTrack, +) { + private var released: Boolean = false - private lateinit var audioRecord: AudioRecord - private lateinit var audioTrack: AudioTrack - private var stopRecording: Boolean = false + fun release() { + if (released) return + released = true - internal fun release() { - stopRecording = true - if (::audioRecord.isInitialized) { - audioRecord.stop() - audioRecord.release() - } - if (::audioTrack.isInitialized) { - audioTrack.stop() - audioTrack.release() - } + recorder.release() + playbackTrack.release() } - internal fun setupAudioTrack() { - audioTrack = - AudioTrack( - AudioManager.STREAM_MUSIC, - 24000, - AudioFormat.CHANNEL_OUT_MONO, - AudioFormat.ENCODING_PCM_16BIT, - AudioTrack.getMinBufferSize( - 24000, - AudioFormat.CHANNEL_OUT_MONO, - AudioFormat.ENCODING_PCM_16BIT - ), - AudioTrack.MODE_STREAM - ) - audioTrack.play() + fun playAudio(data: ByteArray) { + if (released) return + + playbackTrack.write(data, 0, data.size) } - internal fun playAudio(data: ByteArray) { - if (!stopRecording) { - audioTrack.write(data, 0, data.size) - } + fun pauseRecording() { + if (released || recorder.state == AudioRecord.RECORDSTATE_STOPPED) return + + recorder.stop() } - fun stopRecording() { - if ( - ::audioRecord.isInitialized && audioRecord.recordingState == AudioRecord.RECORDSTATE_RECORDING - ) { - audioRecord.stop() - } + fun resumeRecording() { + if (released || recorder.state == AudioRecord.RECORDSTATE_RECORDING) return + + recorder.startRecording() } - fun start() { - if ( - ::audioRecord.isInitialized && audioRecord.recordingState != AudioRecord.RECORDSTATE_RECORDING - ) { - audioRecord.startRecording() - } + fun listenToRecording(): Flow { + if (released) return emptyFlow() + + resumeRecording() + + return recorder.readAsFlow() } - @RequiresPermission(Manifest.permission.RECORD_AUDIO) - fun startRecording(): Flow { - - val bufferSize = - AudioRecord.getMinBufferSize( - 16000, - AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT - ) - if ( - bufferSize == AudioRecord.ERROR || - bufferSize == AudioRecord.ERROR_BAD_VALUE || - bufferSize <= 0 - ) { - throw AudioRecordInitializationFailedException( - "Audio Record buffer size is invalid (${bufferSize})" - ) - } - audioRecord = - AudioRecord( - MediaRecorder.AudioSource.VOICE_COMMUNICATION, - 16000, - AudioFormat.CHANNEL_IN_MONO, - AudioFormat.ENCODING_PCM_16BIT, - bufferSize - ) - if (audioRecord.state != AudioRecord.STATE_INITIALIZED) { - throw AudioRecordInitializationFailedException( - "Audio Record initialization has failed. State: ${audioRecord.state}" - ) + + companion object { + @RequiresPermission(Manifest.permission.RECORD_AUDIO) + fun Build(): AudioHelper { + val playbackTrack = + AudioTrack( + AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION).build(), + AudioFormat.Builder() + .setSampleRate(24000) + .setChannelMask(AudioFormat.CHANNEL_OUT_MONO) + .setEncoding(AudioFormat.ENCODING_PCM_16BIT) + .build(), + AudioTrack.getMinBufferSize( + 24000, + AudioFormat.CHANNEL_OUT_MONO, + AudioFormat.ENCODING_PCM_16BIT + ), + AudioTrack.MODE_STREAM, + AudioManager.AUDIO_SESSION_ID_GENERATE + ) + + playbackTrack.play() + + val bufferSize = + AudioRecord.getMinBufferSize( + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) + + if (bufferSize <= 0) + throw AudioRecordInitializationFailedException( + "Audio Record buffer size is invalid ($bufferSize)" + ) + + val recorder = + AudioRecord( + MediaRecorder.AudioSource.VOICE_COMMUNICATION, + 16000, + AudioFormat.CHANNEL_IN_MONO, + AudioFormat.ENCODING_PCM_16BIT, + bufferSize + ) + if (recorder.state != AudioRecord.STATE_INITIALIZED) + throw AudioRecordInitializationFailedException( + "Audio Record initialization has failed. State: ${recorder.state}" + ) + + if (AcousticEchoCanceler.isAvailable()) { + AcousticEchoCanceler.create(recorder.audioSessionId)?.enabled = true + } + + return AudioHelper(recorder, playbackTrack) } - if (AcousticEchoCanceler.isAvailable()) { - val echoCanceler = AcousticEchoCanceler.create(audioRecord.audioSessionId) - echoCanceler?.enabled = true + } +} + +internal val AudioRecord.minBufferSize: Int + get() = AudioRecord.getMinBufferSize(sampleRate, channelConfiguration, audioFormat) + +internal fun AudioRecord.readAsFlow() = flow { + val buffer = ByteArray(minBufferSize) + + while (true) { + if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + delay(1) + continue } - audioRecord.startRecording() - - return flow { - val buffer = ByteArray(bufferSize) - while (!stopRecording) { - if (audioRecord.recordingState != AudioRecord.RECORDSTATE_RECORDING) { - buffer.fill(0x00) - continue - } - try { - val bytesRead = audioRecord.read(buffer, 0, buffer.size) - if (bytesRead > 0) { - emit(buffer.copyOf(bytesRead)) - } - } catch (_: Exception) {} - } + val bytesRead = read(buffer, 0, buffer.size) + if (bytesRead > 0) { + emit(buffer.copyOf(bytesRead)) + } else { + delay(1) } } } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/BidiGenerateContentClientMessage.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt similarity index 71% rename from firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/BidiGenerateContentClientMessage.kt rename to firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt index 5488cb240f5..1c626f5964d 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/BidiGenerateContentClientMessage.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt @@ -19,19 +19,25 @@ package com.google.firebase.vertexai.type import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.Serializable +/** + * First message in a live session. + * + * Contains configuration that will be used for the duration of the session. + */ @OptIn(ExperimentalSerializationApi::class) @PublicPreviewAPI -internal class BidiGenerateContentClientMessage( +internal class LiveClientSetup( val model: String, + // Some config options are supported in generateContent but not in bidi and vise versa; so bidi + // needs its own config class val generationConfig: LiveGenerationConfig.Internal?, val tools: List?, val systemInstruction: Content.Internal? ) { - @Serializable - internal class Internal(val setup: BidiGenerateContentSetup) { + internal class Internal(val setup: LiveClientSetup) { @Serializable - internal data class BidiGenerateContentSetup( + internal data class LiveClientSetup( val model: String, val generationConfig: LiveGenerationConfig.Internal?, val tools: List?, @@ -40,5 +46,5 @@ internal class BidiGenerateContentClientMessage( } fun toInternal() = - Internal(Internal.BidiGenerateContentSetup(model, generationConfig, tools, systemInstruction)) + Internal(Internal.LiveClientSetup(model, generationConfig, tools, systemInstruction)) } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveGenerationConfig.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveGenerationConfig.kt index 55e789fd14f..849882977d1 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveGenerationConfig.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveGenerationConfig.kt @@ -20,7 +20,7 @@ import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable /** - * Configuration parameters to use for content generation. + * Configuration parameters to use for live content generation. * * @property temperature A parameter controlling the degree of randomness in token selection. A * temperature of 0 means that the highest probability tokens are always selected. In this case, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 7e14a80dfcc..bdbf1f48f50 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -16,183 +16,85 @@ package com.google.firebase.vertexai.type +import android.Manifest.permission.RECORD_AUDIO import android.media.AudioFormat import android.media.AudioTrack import android.util.Log -import com.google.firebase.annotations.concurrent.Background -import com.google.firebase.vertexai.LiveGenerativeModel +import androidx.annotation.RequiresPermission +import com.google.firebase.annotations.concurrent.Blocking +import com.google.firebase.vertexai.common.JSON import io.ktor.client.plugins.websocket.ClientWebSocketSession import io.ktor.websocket.Frame import io.ktor.websocket.close import io.ktor.websocket.readBytes -import java.io.ByteArrayOutputStream import java.util.concurrent.ConcurrentLinkedQueue +import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext +import kotlin.coroutines.EmptyCoroutineContext import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.DelicateCoroutinesApi +import kotlinx.coroutines.Job import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED +import kotlinx.coroutines.currentCoroutineContext +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.flow.receiveAsFlow +import kotlinx.coroutines.flow.transform +import kotlinx.coroutines.isActive import kotlinx.coroutines.launch +import kotlinx.coroutines.yield import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.SerialName import kotlinx.serialization.Serializable import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json import kotlinx.serialization.json.JsonNull +import kotlinx.serialization.json.JsonObject +import kotlinx.serialization.json.decodeFromJsonElement + +// TODO: maybe dont use a bytearray, maybe use a channel or list to avoid accidental overflow +internal fun Flow.accumulateUntil( + minSize: Int, + emitLeftOvers: Boolean = false +): Flow = flow { + var offset = 0 + val audioBuffer = ByteArray(minSize * 2) + + collect { + it.copyInto(audioBuffer, offset) + offset += it.size + if (offset >= minSize) { + emit(audioBuffer.clone()) + audioBuffer.fill(0) + offset = 0 + } + } + // Emit any leftover bytes (optional) + if (emitLeftOvers && offset > 0) { + emit(audioBuffer.clone()) + } +} /** Represents a live WebSocket session capable of streaming content to and from the server. */ @PublicPreviewAPI @OptIn(ExperimentalSerializationApi::class) public class LiveSession internal constructor( - private val session: ClientWebSocketSession?, - @Background private val backgroundDispatcher: CoroutineContext, + private val session: ClientWebSocketSession, + @Blocking private val backgroundDispatcher: CoroutineContext, + // TODO: might need to be AtomicRef private var audioHelper: AudioHelper? = null ) { + private val flow = session.incoming.receiveAsFlow() + private var scope = CancelledCoroutineScope - private val audioQueue = ConcurrentLinkedQueue() private val playBackQueue = ConcurrentLinkedQueue() - private var startedReceiving = false - private var receiveChannel: Channel = Channel() - private var isRecording: Boolean = false - - private companion object { - val TAG = LiveSession::class.java.simpleName - val MIN_BUFFER_SIZE = - AudioTrack.getMinBufferSize( - 24000, - AudioFormat.CHANNEL_OUT_MONO, - AudioFormat.ENCODING_PCM_16BIT - ) - } - - internal class ClientContentSetup(val turns: List, val turnComplete: Boolean) { - @Serializable - internal class Internal(@SerialName("client_content") val clientContent: ClientContent) { - @Serializable - internal data class ClientContent( - val turns: List, - @SerialName("turn_complete") val turnComplete: Boolean - ) - } - - fun toInternal() = Internal(Internal.ClientContent(turns, turnComplete)) - } - - @OptIn(ExperimentalSerializationApi::class) - internal class ToolResponseSetup( - val functionResponses: List - ) { - - @Serializable - internal data class Internal(val toolResponse: ToolResponse) { - @Serializable - internal data class ToolResponse( - val functionResponses: List - ) - } - - fun toInternal() = Internal(Internal.ToolResponse(functionResponses)) - } - - internal class ServerContentSetup(val modelTurn: Content.Internal) { - @Serializable - internal class Internal(@SerialName("serverContent") val serverContent: ServerContent) { - @Serializable - internal data class ServerContent(@SerialName("modelTurn") val modelTurn: Content.Internal) - } - - fun toInternal() = Internal(Internal.ServerContent(modelTurn)) - } - - internal class MediaStreamingSetup(val mediaChunks: List) { - @Serializable - internal class Internal(val realtimeInput: MediaChunks) { - @Serializable internal data class MediaChunks(val mediaChunks: List) - } - fun toInternal() = Internal(Internal.MediaChunks(mediaChunks)) - } - - internal data class ToolCallSetup( - val functionCalls: List - ) { - - @Serializable - internal class Internal(val toolCall: ToolCall) { - - @Serializable - internal data class ToolCall(val functionCalls: List) - } - - fun toInternal(): Internal { - return Internal(Internal.ToolCall(functionCalls)) - } - } - - private fun fillRecordedAudioQueue() { - CoroutineScope(backgroundDispatcher).launch { - audioHelper!!.startRecording().collect { - if (!isRecording) { - cancel() - } - audioQueue.add(it) - } - } - } - - private suspend fun sendAudioDataToServer() { - - val audioBufferStream = ByteArrayOutputStream() - while (isRecording) { - val receivedAudio = audioQueue.poll() ?: continue - audioBufferStream.write(receivedAudio) - if (audioBufferStream.size() >= MIN_BUFFER_SIZE) { - sendMediaStream(listOf(MediaData(audioBufferStream.toByteArray(), "audio/pcm"))) - audioBufferStream.reset() - } - } - } - - private fun fillServerResponseAudioQueue( - functionCallsHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null - ) { - CoroutineScope(backgroundDispatcher).launch { - receive().collect { - if (!isRecording) { - cancel() - } - when (it.status) { - LiveContentResponse.Status.INTERRUPTED -> - while (!playBackQueue.isEmpty()) playBackQueue.poll() - LiveContentResponse.Status.NORMAL -> - if (!it.functionCalls.isNullOrEmpty() && functionCallsHandler != null) { - sendFunctionResponse(it.functionCalls.map(functionCallsHandler).toList()) - } else { - val audioData = it.data?.parts?.get(0)?.asInlineDataPartOrNull()?.inlineData - if (audioData != null) { - playBackQueue.add(audioData) - } - } - } - } - } - } - - private fun playServerResponseAudio() { - CoroutineScope(backgroundDispatcher).launch { - while (isRecording) { - val data = playBackQueue.poll() - if (data == null) { - audioHelper?.start() - continue - } - audioHelper?.stopRecording() - audioHelper?.playAudio(data) - } - } - } + private val startedReceiving = AtomicBoolean(false) /** * Starts an audio conversation with the Gemini server, which can only be stopped using @@ -201,20 +103,27 @@ internal constructor( * @param functionCallHandler A callback function that is invoked whenever the server receives a * function call. */ + @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null ) { - if (isRecording) { - Log.w(TAG, "startAudioConversation called after the recording has already started.") + if (scope.isActive) { + Log.w( + TAG, + "startAudioConversation called after the recording has already started. " + + "Call stopAudioConversation to close the previous connection." + ) return } - isRecording = true - audioHelper = AudioHelper() - audioHelper!!.setupAudioTrack() - fillRecordedAudioQueue() - CoroutineScope(backgroundDispatcher).launch { sendAudioDataToServer() } - fillServerResponseAudioQueue(functionCallHandler) - playServerResponseAudio() + + scope = CoroutineScope(backgroundDispatcher + childJob()) + audioHelper = AudioHelper.Build() + + Log.d(TAG, "Starting audio conversation") + + recordUserAudio() + processModelResponses(functionCallHandler) + listenForModelPlayback() } /** @@ -222,92 +131,37 @@ internal constructor( * [startAudioConversation] */ public fun stopAudioConversation() { - stopReceiving() - isRecording = false - audioHelper?.let { - while (playBackQueue.isNotEmpty()) playBackQueue.poll() - while (audioQueue.isNotEmpty()) audioQueue.poll() - it.release() - } + if(!startedReceiving.get()) return + + scope.cancel() + playBackQueue.clear() + + audioHelper?.release() audioHelper = null - } - /** - * Stops receiving from the model. - * - * If this function is called during an ongoing audio conversation, the model's response will not - * be received, and no audio will be played; the live session object will no longer receive data - * from the server. - * - * To resume receiving data, you must either handle it directly using [receive], or indirectly by - * using [startAudioConversation]. - */ - public fun stopReceiving() { - if (!startedReceiving) { - return - } - receiveChannel.cancel() - receiveChannel = Channel() - startedReceiving = false + startedReceiving.set(false) } /** - * Receives responses from the server for both streaming and standard requests. Call - * [stopReceiving] to stop receiving responses from the server. + * Receives responses from the model for both streaming and standard requests. + * + * Call [close] to stop receiving responses from the model. * * @return A [Flow] which will emit [LiveContentResponse] as and when it receives it * * @throws [SessionAlreadyReceivingException] when the session is already receiving. */ + @OptIn(DelicateCoroutinesApi::class) public fun receive(): Flow { - if (startedReceiving) { + if (startedReceiving.getAndSet(true)) { throw SessionAlreadyReceivingException() } + Log.d(TAG, "Starting receive connection.") - val flowReceive = session!!.incoming.receiveAsFlow() - CoroutineScope(backgroundDispatcher).launch { flowReceive.collect { receiveChannel.send(it) } } - return flow { - startedReceiving = true - while (true) { - val message = receiveChannel.receive() - val receivedBytes = (message as Frame.Binary).readBytes() - val receivedJson = receivedBytes.toString(Charsets.UTF_8) - if (receivedJson.contains("interrupted")) { - emit(LiveContentResponse(null, LiveContentResponse.Status.INTERRUPTED, null)) - continue - } - if (receivedJson.contains("turnComplete")) { - emit(LiveContentResponse(null, LiveContentResponse.Status.TURN_COMPLETE, null)) - continue - } - try { - val serverContent = Json.decodeFromString(receivedJson) - val data = serverContent.serverContent.modelTurn.toPublic() - if (data.parts[0].asInlineDataPartOrNull()?.mimeType?.equals("audio/pcm") == true) { - emit(LiveContentResponse(data, LiveContentResponse.Status.NORMAL, null)) - } - if (data.parts[0] is TextPart) { - emit(LiveContentResponse(data, LiveContentResponse.Status.NORMAL, null)) - } - continue - } catch (e: Exception) { - Log.i(TAG, "Failed to decode server content: ${e.message}") - } - try { - val functionContent = Json.decodeFromString(receivedJson) - emit( - LiveContentResponse( - null, - LiveContentResponse.Status.NORMAL, - functionContent.toolCall.functionCalls.map { - FunctionCallPart(it.name, it.args.orEmpty().mapValues { x -> x.value ?: JsonNull }) - } - ) - ) - continue - } catch (e: Exception) { - Log.w(TAG, "Failed to decode function calling: ${e.message}") - } + return flow.transform { frame -> + val response = frameToLiveContentResponse(frame) + response?.let { + emit(it) } } } @@ -321,23 +175,32 @@ internal constructor( public suspend fun sendFunctionResponse(functionList: List) { val jsonString = Json.encodeToString( - ToolResponseSetup(functionList.map { it.toInternalFunctionCall() }).toInternal() + LiveToolResponseSetup(functionList.map { it.toInternalFunctionCall() }).toInternal() ) - session?.send(Frame.Text(jsonString)) + session.send(Frame.Text(jsonString)) } /** * Streams client data to the server. Calling this after [startAudioConversation] will play the * response audio immediately. * - * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. + * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be + * sent. */ + private var sent = false public suspend fun sendMediaStream( - mediaChunks: List, + mediaChunks: List, ) { + Log.d(TAG, "Sending data to server") val jsonString = - Json.encodeToString(MediaStreamingSetup(mediaChunks.map { it.toInternal() }).toInternal()) - session?.send(Frame.Text(jsonString)) + Json.encodeToString( + LiveClientRealtimeInputSetup(mediaChunks.map { it.toInternal() }).toInternal() + ) + Log.d(TAG, "Calling send for: $jsonString") + if(sent) return + sent = true + session.send(Frame.Text(jsonString)) + Log.d(TAG, "Sent") } /** @@ -348,8 +211,8 @@ internal constructor( */ public suspend fun send(content: Content) { val jsonString = - Json.encodeToString(ClientContentSetup(listOf(content.toInternal()), true).toInternal()) - session?.send(Frame.Text(jsonString)) + Json.encodeToString(LiveClientContentSetup(listOf(content.toInternal()), true).toInternal()) + session.send(Frame.Text(jsonString)) } /** @@ -362,13 +225,302 @@ internal constructor( send(Content.Builder().text(text).build()) } + /** Closes the client session. */ + public suspend fun close() { + session.close() + stopAudioConversation() + } + + /** Listen to the user's microphone and send the data to the model. */ + private fun recordUserAudio() { + // Buffer the recording so we can keep recording while data is sent to the server + audioHelper + ?.listenToRecording() + ?.buffer(UNLIMITED) + ?.accumulateUntil(MIN_BUFFER_SIZE) + ?.onEach { sendMediaStream(listOf(InlineDataPart(it, "audio/pcm"))) } + ?.launchIn(scope) + } + + /** + * Processes responses from the model during an audio conversation. + * + * Audio messages are added to [playBackQueue]. + * + * Launched asynchronously on [scope]. + * + * @param functionCallHandler A callback function that is invoked whenever the server receives a + * function call. + */ + private fun processModelResponses( + functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? + ) { + receive() + .transform { + Log.d(TAG, "Processing model response") + if (it.status == LiveContentResponse.Status.INTERRUPTED) { + playBackQueue.clear() + } else { + emit(it) + } + } + .onEach { + if (!it.functionCalls.isNullOrEmpty()) { + if (functionCallHandler != null) { + // It's fine to suspend here since you can't have a function call running concurrently + // with an audio response + sendFunctionResponse(it.functionCalls.map(functionCallHandler).toList()) + } else { + Log.w( + TAG, + "Function calls were present in the response, but a functionCallHandler was not provided." + ) + } + } + + val audioParts = it.data?.parts?.filterIsInstance().orEmpty() + for (part in audioParts) { + Log.d(TAG, "Sending model audio response") + playBackQueue.add(part.inlineData) + } + } + .launchIn(scope) + } + /** - * Closes the client session. + * Listens for playback data from the model and plays the audio. * - * After this is called, the session object becomes unusable. To interact with the server again, - * you must create a new session using [LiveGenerativeModel]. + * Polls [playBackQueue] for data, and calls [AudioHelper.playAudio] when data is received. + * + * Launched asynchronously on [scope]. */ - public suspend fun close() { - session?.close() + private fun listenForModelPlayback() { + scope.launch { + while (isActive) { + val playbackData = playBackQueue.poll() + if (playbackData == null) { + // The model playback queue is complete, so we can continue recording + // TODO(b/408223520): Conditionally resume when param is added + audioHelper?.resumeRecording() + yield() + } else { + /** + * We pause the recording while the model is speaking to avoid interrupting it because of + * no echo cancellation + */ + // TODO(b/408223520): Conditionally pause when param is added + audioHelper?.pauseRecording() + + audioHelper?.playAudio(playbackData) + } + } + } + } + + /** + * Converts a [Frame] from the model to a valid [LiveContentResponse], if possible. + * + * @return The corresponding [LiveContentResponse] or null if it couldn't be converted. + */ + private fun frameToLiveContentResponse(frame: Frame): LiveContentResponse? { + // TODO(b/xxx): Add support for LiveServerToolCallCancellation + // TODO: idk where I started crashing, but it crashes in emulator too. gonna need to work backwards + + val jsonMessage = Json.parseToJsonElement(frame.readBytes().toString(Charsets.UTF_8)) + Log.d(TAG, "Decoding frame from the server: $jsonMessage") + + if(jsonMessage !is JsonObject) { + Log.w(TAG, "Server response was not a JsonObject: $jsonMessage") + return null + } + + return when { + "toolCall" in jsonMessage -> { + val functionContent = JSON.decodeFromJsonElement(jsonMessage) + LiveContentResponse( + null, + LiveContentResponse.Status.NORMAL, + functionContent.toolCall.functionCalls.map { + FunctionCallPart(it.name, it.args.orEmpty().mapValues { x -> x.value ?: JsonNull }) + } + ) + } + "serverContent" in jsonMessage -> { + val serverContent = JSON.decodeFromJsonElement(jsonMessage).serverContent + val status = when { + serverContent.turnComplete == true -> LiveContentResponse.Status.TURN_COMPLETE + serverContent.interrupted == true -> LiveContentResponse.Status.INTERRUPTED + else -> LiveContentResponse.Status.NORMAL + } + LiveContentResponse( + serverContent.modelTurn?.toPublic(), + status, + null + ) + } + else -> { + Log.w(TAG, "Failed to decode the server response: $jsonMessage") + null + } + } +// +// val jsonMessage = frame.readBytes().toString(Charsets.UTF_8) +// +// if (jsonMessage.contains("interrupted")) { +// return LiveContentResponse(null, LiveContentResponse.Status.INTERRUPTED, null) +// } +// if (jsonMessage.contains("turnComplete")) { +// // TODO(daymxn): There can still be data present, we should use it +// return LiveContentResponse(null, LiveContentResponse.Status.TURN_COMPLETE, null) +// } +// +// try { +// val serverContent = Json.decodeFromString(jsonMessage) +// val data = serverContent.serverContent.modelTurn?.toPublic() +// +// return LiveContentResponse(data, LiveContentResponse.Status.NORMAL, null) +// } catch (e: Exception) { +// Log.i(TAG, "Failed to decode server content: ${e.message}") +// } +// +// try { +// val functionContent = Json.decodeFromString(jsonMessage) +// return LiveContentResponse( +// null, +// LiveContentResponse.Status.NORMAL, +// functionContent.toolCall.functionCalls.map { +// FunctionCallPart(it.name, it.args.orEmpty().mapValues { x -> x.value ?: JsonNull }) +// } +// ) +// } catch (e: Exception) { +// Log.w(TAG, "Failed to decode function calling: ${e.message}") +// } +// +// return null + } + + /** + * Incremental update of the current conversation delivered from the client. + * + * Effectively, a message from the client to the model. + */ + internal class LiveClientContentSetup( + val turns: List, + val turnComplete: Boolean + ) { + @Serializable + internal class Internal(val clientContent: LiveClientContent) { + @Serializable + internal data class LiveClientContent( + val turns: List, + val turnComplete: Boolean + ) + } + + fun toInternal() = Internal(Internal.LiveClientContent(turns, turnComplete)) + } + + /** + * Incremental server update generated by the model in response to client messages. + * + * Effectively, a message from the model to the client. + */ + internal class LiveServerContentSetup( + val modelTurn: Content.Internal?, + val turnComplete: Boolean?, + val interrupted: Boolean? + ) { + @Serializable + internal class Internal(val serverContent: LiveServerContent) { + @Serializable + internal data class LiveServerContent( + val modelTurn: Content.Internal?, + val turnComplete: Boolean?, + val interrupted: Boolean? + ) + } + + fun toInternal() = Internal(Internal.LiveServerContent(modelTurn, turnComplete, interrupted)) + } + + /** + * Request for the client to execute the provided function calls and return the responses with the + * matched `id`s. + */ + internal data class LiveServerToolCall( + val functionCalls: List + ) { + @Serializable + internal class Internal(val toolCall: LiveServerToolCall) { + @Serializable + internal data class LiveServerToolCall( + val functionCalls: List + ) + } + + fun toInternal(): Internal { + return Internal(Internal.LiveServerToolCall(functionCalls)) + } + } + + /** Client generated responses to a [LiveServerToolCall]. */ + internal class LiveToolResponseSetup( + val functionResponses: List + ) { + @Serializable + internal data class Internal(val toolResponse: LiveToolResponse) { + @Serializable + internal data class LiveToolResponse( + val functionResponses: List + ) + } + + fun toInternal() = Internal(Internal.LiveToolResponse(functionResponses)) + } + + /** + * User input that is sent to the model in real time. + * + * End of turn is derived from user activity (eg; end of speech). + */ + internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { + @Serializable + internal class Internal(val realtimeInput: LiveClientRealtimeInput) { + @Serializable + internal data class LiveClientRealtimeInput(val mediaChunks: List) + } + fun toInternal() = Internal(Internal.LiveClientRealtimeInput(mediaChunks)) + } + + private companion object { + val TAG = LiveSession::class.java.simpleName + val MIN_BUFFER_SIZE = + AudioTrack.getMinBufferSize( + 24000, + AudioFormat.CHANNEL_OUT_MONO, + AudioFormat.ENCODING_PCM_16BIT + ) } } + +@OptIn(DelicateCoroutinesApi::class) +internal fun Channel.duplicate(): Pair, Flow> { + val newChannel = Channel() + val newFlow = flow { + while (!newChannel.isClosedForSend) { + if (isClosedForReceive) { + newChannel.close() + continue + } + val data = receive() + // should we check for close here too? + emit(data) + } + } + + return newChannel to newFlow +} + +internal suspend inline fun childJob() = Job(currentCoroutineContext()[Job] ?: Job()) + +internal val CancelledCoroutineScope = CoroutineScope(EmptyCoroutineContext).apply { cancel() } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt index 21d3c0edc6c..6f8c6452ed7 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt @@ -70,15 +70,24 @@ public class InlineDataPart(public val inlineData: ByteArray, public val mimeTyp * * @param name the name of the function to call * @param args the function parameters and values as a [Map] + * @param id unique id of the function call. If populated, the returned [FunctionResponsePart] + * should have a matching `id` field. */ -public class FunctionCallPart(public val name: String, public val args: Map) : - Part { +public class FunctionCallPart( + public val name: String, + public val args: Map, + public val id: String? = null +) : Part { @Serializable internal data class Internal(val functionCall: FunctionCall) : InternalPart { @Serializable - internal data class FunctionCall(val name: String, val args: Map? = null) + internal data class FunctionCall( + val name: String, + val args: Map? = null, + val id: String? = null + ) } } @@ -87,13 +96,23 @@ public class FunctionCallPart(public val name: String, public val args: Map( + """ + { + "serverContent": { + "modelTurn": { + "role": "server", + "parts": [{ + "inlineData": { + "mimeType": "audio/pcm", + "data": ${Base64.encodeToString(bytes, BASE_64_FLAGS)} + } + }] + } + } + } + """ + .trimIndent() + ) + + routing { + webSocket(path = testUrl) { + send(Frame.Text("setupComplete")) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + // send(Frame.Binary(true, + // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), + // testResponse).toByteArray())) + send( + Frame.Binary( + true, + JSON.encodeToString( + LiveSession.LiveServerContentSetup.Internal.serializer(), + testResponse + ) + .toByteArray() + ) + ) + + for (frame in incoming) { + + } + } + } + + val channel = ByteChannel(autoFlush = true) + val mockFirebaseApp = Mockito.mock() + Mockito.`when`(mockFirebaseApp.isDataCollectionDefaultEnabled).thenReturn(false) + + val apiController = + Mockito.spy( + APIController( + "super_cool_test_key", + "gemini-pro", + RequestOptions(), + client.engine, + TEST_CLIENT_ID, + mockFirebaseApp, + TEST_VERSION, + TEST_APP_ID, + null, + ) + ) + + Mockito.doReturn(testUrl).`when`(apiController).getBidiEndpoint(Mockito.anyString()) + // Mockito.`when`(apiController.getBidiEndpoint(Mockito.anyString())).thenReturn(server.url("/").toString()) + + // val scope = CoroutineScope(firebaseExecutors.blocking) + val model = + LiveGenerativeModel( + "cool-model-name", + location = "us-central1", + backgroundDispatcher = Dispatchers.IO, + controller = apiController + ) + + // setupComplete + withTimeout(5_000) { + // channel.send("setupComplete".toByteArray()) + val connection = model.connect() + val value = AtomicInteger(0) + val currDelay = 100L + val scope = CoroutineScope(Dispatchers.IO) + scope.launch { + println("Launching 1") + connection.receive().collect { + println(""" + Got message: + Status: ${it.status} + Data: ${it.data != null} + Parts: ${it.data?.parts?.size} + InlineData Size: ${it.data?.parts?.first()?.asInlineDataPartOrNull()?.inlineData?.size} + + """.trimIndent()) + println("1 => ${it.data?.parts?.first()?.asInlineDataPartOrNull()?.inlineData?.size}") + value.incrementAndGet() + } + println("Done with 1") + } + + delay(currDelay) + println("Sending") + delay(currDelay) + connection.send("") + delay(currDelay) + delay(currDelay) + //connection.stopReceiving() + connection.close() + delay(currDelay) + value.get().shouldBeEqual(1) + } + + // // setupComplete + // withTimeout(10_000) { + // //channel.send("setupComplete".toByteArray()) + // val connection = model.connect() + // connection.startAudioConversation() + // //delay(5_000) + // runCurrent() + // connection.stopAudioConversation() + // connection.close() + // } + } +} diff --git a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/util/tests.kt b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/util/tests.kt index a683c1d5032..6af4339dbb4 100644 --- a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/util/tests.kt +++ b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/util/tests.kt @@ -38,9 +38,9 @@ import java.io.File import kotlinx.coroutines.launch import org.mockito.Mockito -private val TEST_CLIENT_ID = "firebase-vertexai-android/test" -private val TEST_APP_ID = "1:android:12345" -private val TEST_VERSION = 1 +internal val TEST_CLIENT_ID = "firebase-vertexai-android/test" +internal val TEST_APP_ID = "1:android:12345" +internal val TEST_VERSION = 1 /** String separator used in SSE communication to signal the end of a message. */ internal const val SSE_SEPARATOR = "\r\n\r\n" @@ -98,6 +98,7 @@ internal typealias CommonTest = suspend CommonTestScope.() -> Unit * @param block The test contents themselves, with the [CommonTestScope] implicitly provided * @see CommonTestScope */ +@OptIn(PublicPreviewAPI::class) internal fun commonTest( status: HttpStatusCode = HttpStatusCode.OK, requestOptions: RequestOptions = RequestOptions(), diff --git a/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java b/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java index 066e672ffb8..02499602da4 100644 --- a/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java +++ b/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java @@ -75,7 +75,7 @@ private void testFutures(GenerativeModelFutures futures) throws Exception { .addFileData("fakeuri", "image/png") .addInlineData(new byte[] {}, "text/json") .addImage(Bitmap.createBitmap(0, 0, Bitmap.Config.HARDWARE)) - .addPart(new FunctionCallPart("fakeFunction", Map.of("fakeArg", JsonNull.INSTANCE))) + .addPart(new FunctionCallPart("fakeFunction", Map.of("fakeArg", JsonNull.INSTANCE), null)) .build(); // TODO b/406558430 Content.Builder.setParts and Content.Builder.setRole return void Executor executor = FirebaseExecutors.directExecutor(); From 96544967ce9138f45da2b4b9e0bc2c82f07057c0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:18:39 -0500 Subject: [PATCH 02/40] Fix audiohelper bug --- .../kotlin/com/google/firebase/vertexai/type/AudioHelper.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index c7205042dba..2729db2426b 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -54,13 +54,13 @@ internal class AudioHelper( } fun pauseRecording() { - if (released || recorder.state == AudioRecord.RECORDSTATE_STOPPED) return + if (released || recorder.recordingState == AudioRecord.RECORDSTATE_STOPPED) return recorder.stop() } fun resumeRecording() { - if (released || recorder.state == AudioRecord.RECORDSTATE_RECORDING) return + if (released || recorder.recordingState == AudioRecord.RECORDSTATE_RECORDING) return recorder.startRecording() } From c9065099b184c97ecb5d1de16b5a895442f21d01 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:19:07 -0500 Subject: [PATCH 03/40] Rename class --- .../com/google/firebase/vertexai/LiveGenerativeModel.kt | 4 ++-- .../type/{LiveClientSetup.kt => LiveClientSetupMessage.kt} | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/{LiveClientSetup.kt => LiveClientSetupMessage.kt} (97%) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt index c83b41bfa47..70a5a18af2b 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt @@ -23,7 +23,7 @@ import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.vertexai.common.APIController import com.google.firebase.vertexai.common.AppCheckHeaderProvider import com.google.firebase.vertexai.type.Content -import com.google.firebase.vertexai.type.LiveClientSetup +import com.google.firebase.vertexai.type.LiveClientSetupMessage import com.google.firebase.vertexai.type.LiveGenerationConfig import com.google.firebase.vertexai.type.LiveSession import com.google.firebase.vertexai.type.PublicPreviewAPI @@ -93,7 +93,7 @@ internal constructor( @OptIn(ExperimentalSerializationApi::class) public suspend fun connect(): LiveSession { val clientMessage = - LiveClientSetup( + LiveClientSetupMessage( modelName, config?.toInternal(), tools?.map { it.toInternal() }, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetupMessage.kt similarity index 97% rename from firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt rename to firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetupMessage.kt index 1c626f5964d..6b751961ed2 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetup.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveClientSetupMessage.kt @@ -26,7 +26,7 @@ import kotlinx.serialization.Serializable */ @OptIn(ExperimentalSerializationApi::class) @PublicPreviewAPI -internal class LiveClientSetup( +internal class LiveClientSetupMessage( val model: String, // Some config options are supported in generateContent but not in bidi and vise versa; so bidi // needs its own config class From 909a730e08909f835b53eb975bce3285a814296d Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:20:04 -0500 Subject: [PATCH 04/40] Remove mediadata --- .../firebase/vertexai/type/LiveSession.kt | 74 +------------------ .../firebase/vertexai/type/MediaData.kt | 40 ---------- 2 files changed, 3 insertions(+), 111 deletions(-) delete mode 100644 firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index bdbf1f48f50..77721deadd7 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -119,8 +119,6 @@ internal constructor( scope = CoroutineScope(backgroundDispatcher + childJob()) audioHelper = AudioHelper.Build() - Log.d(TAG, "Starting audio conversation") - recordUserAudio() processModelResponses(functionCallHandler) listenForModelPlayback() @@ -151,12 +149,10 @@ internal constructor( * * @throws [SessionAlreadyReceivingException] when the session is already receiving. */ - @OptIn(DelicateCoroutinesApi::class) public fun receive(): Flow { if (startedReceiving.getAndSet(true)) { throw SessionAlreadyReceivingException() } - Log.d(TAG, "Starting receive connection.") return flow.transform { frame -> val response = frameToLiveContentResponse(frame) @@ -187,20 +183,14 @@ internal constructor( * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be * sent. */ - private var sent = false public suspend fun sendMediaStream( mediaChunks: List, ) { - Log.d(TAG, "Sending data to server") val jsonString = Json.encodeToString( - LiveClientRealtimeInputSetup(mediaChunks.map { it.toInternal() }).toInternal() + LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal() as InlineDataPart.Internal).inlineData }).toInternal() ) - Log.d(TAG, "Calling send for: $jsonString") - if(sent) return - sent = true session.send(Frame.Text(jsonString)) - Log.d(TAG, "Sent") } /** @@ -257,7 +247,6 @@ internal constructor( ) { receive() .transform { - Log.d(TAG, "Processing model response") if (it.status == LiveContentResponse.Status.INTERRUPTED) { playBackQueue.clear() } else { @@ -280,7 +269,6 @@ internal constructor( val audioParts = it.data?.parts?.filterIsInstance().orEmpty() for (part in audioParts) { - Log.d(TAG, "Sending model audio response") playBackQueue.add(part.inlineData) } } @@ -323,11 +311,7 @@ internal constructor( * @return The corresponding [LiveContentResponse] or null if it couldn't be converted. */ private fun frameToLiveContentResponse(frame: Frame): LiveContentResponse? { - // TODO(b/xxx): Add support for LiveServerToolCallCancellation - // TODO: idk where I started crashing, but it crashes in emulator too. gonna need to work backwards - val jsonMessage = Json.parseToJsonElement(frame.readBytes().toString(Charsets.UTF_8)) - Log.d(TAG, "Decoding frame from the server: $jsonMessage") if(jsonMessage !is JsonObject) { Log.w(TAG, "Server response was not a JsonObject: $jsonMessage") @@ -363,40 +347,6 @@ internal constructor( null } } -// -// val jsonMessage = frame.readBytes().toString(Charsets.UTF_8) -// -// if (jsonMessage.contains("interrupted")) { -// return LiveContentResponse(null, LiveContentResponse.Status.INTERRUPTED, null) -// } -// if (jsonMessage.contains("turnComplete")) { -// // TODO(daymxn): There can still be data present, we should use it -// return LiveContentResponse(null, LiveContentResponse.Status.TURN_COMPLETE, null) -// } -// -// try { -// val serverContent = Json.decodeFromString(jsonMessage) -// val data = serverContent.serverContent.modelTurn?.toPublic() -// -// return LiveContentResponse(data, LiveContentResponse.Status.NORMAL, null) -// } catch (e: Exception) { -// Log.i(TAG, "Failed to decode server content: ${e.message}") -// } -// -// try { -// val functionContent = Json.decodeFromString(jsonMessage) -// return LiveContentResponse( -// null, -// LiveContentResponse.Status.NORMAL, -// functionContent.toolCall.functionCalls.map { -// FunctionCallPart(it.name, it.args.orEmpty().mapValues { x -> x.value ?: JsonNull }) -// } -// ) -// } catch (e: Exception) { -// Log.w(TAG, "Failed to decode function calling: ${e.message}") -// } -// -// return null } /** @@ -483,11 +433,11 @@ internal constructor( * * End of turn is derived from user activity (eg; end of speech). */ - internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { + internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { @Serializable internal class Internal(val realtimeInput: LiveClientRealtimeInput) { @Serializable - internal data class LiveClientRealtimeInput(val mediaChunks: List) + internal data class LiveClientRealtimeInput(val mediaChunks: List) } fun toInternal() = Internal(Internal.LiveClientRealtimeInput(mediaChunks)) } @@ -503,24 +453,6 @@ internal constructor( } } -@OptIn(DelicateCoroutinesApi::class) -internal fun Channel.duplicate(): Pair, Flow> { - val newChannel = Channel() - val newFlow = flow { - while (!newChannel.isClosedForSend) { - if (isClosedForReceive) { - newChannel.close() - continue - } - val data = receive() - // should we check for close here too? - emit(data) - } - } - - return newChannel to newFlow -} - internal suspend inline fun childJob() = Job(currentCoroutineContext()[Job] ?: Job()) internal val CancelledCoroutineScope = CoroutineScope(EmptyCoroutineContext).apply { cancel() } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt deleted file mode 100644 index 7e58c9cf43c..00000000000 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.google.firebase.vertexai.type - -import android.util.Base64 -import kotlinx.serialization.Serializable - -/** - * Represents the media data to be sent to the server - * - * @param data Byte array representing the data to be sent. - * @param mimeType an IANA standard MIME type. For supported MIME type values see the - * [Firebase documentation](https://firebase.google.com/docs/vertex-ai/input-file-requirements). - */ -@PublicPreviewAPI -public class MediaData(public val data: ByteArray, public val mimeType: String) { - @Serializable - internal class Internal( - val data: String, - val mimeType: String, - ) - - internal fun toInternal(): Internal { - return Internal(Base64.encodeToString(data, BASE_64_FLAGS), mimeType) - } -} From d3bf81235581b8b6e6d030844db04cd5f762ccbf Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:26:24 -0500 Subject: [PATCH 05/40] Remove id changes, but leave a TODO --- .../com/google/firebase/vertexai/type/Part.kt | 15 +++++---------- .../firebase/vertexai/JavaCompileTests.java | 2 +- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt index 6f8c6452ed7..540d6d757b7 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt @@ -70,13 +70,11 @@ public class InlineDataPart(public val inlineData: ByteArray, public val mimeTyp * * @param name the name of the function to call * @param args the function parameters and values as a [Map] - * @param id unique id of the function call. If populated, the returned [FunctionResponsePart] - * should have a matching `id` field. */ +// TODO(b/410040441): Support id property public class FunctionCallPart( public val name: String, public val args: Map, - public val id: String? = null ) : Part { @Serializable @@ -85,8 +83,7 @@ public class FunctionCallPart( @Serializable internal data class FunctionCall( val name: String, - val args: Map? = null, - val id: String? = null + val args: Map? = null ) } } @@ -96,12 +93,11 @@ public class FunctionCallPart( * * @param name the name of the called function * @param response the response produced by the function as a [JSONObject] - * @param id unique id matching the [FunctionCallPart], if one was present. */ +// TODO(b/410040441): Support id property public class FunctionResponsePart( public val name: String, - public val response: JsonObject, - public val id: String? = null + public val response: JsonObject ) : Part { @Serializable @@ -110,8 +106,7 @@ public class FunctionResponsePart( @Serializable internal data class FunctionResponse( val name: String, - val response: JsonObject, - val id: String? = null + val response: JsonObject ) } diff --git a/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java b/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java index 02499602da4..066e672ffb8 100644 --- a/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java +++ b/firebase-vertexai/src/testUtil/java/com/google/firebase/vertexai/JavaCompileTests.java @@ -75,7 +75,7 @@ private void testFutures(GenerativeModelFutures futures) throws Exception { .addFileData("fakeuri", "image/png") .addInlineData(new byte[] {}, "text/json") .addImage(Bitmap.createBitmap(0, 0, Bitmap.Config.HARDWARE)) - .addPart(new FunctionCallPart("fakeFunction", Map.of("fakeArg", JsonNull.INSTANCE), null)) + .addPart(new FunctionCallPart("fakeFunction", Map.of("fakeArg", JsonNull.INSTANCE))) .build(); // TODO b/406558430 Content.Builder.setParts and Content.Builder.setRole return void Executor executor = FirebaseExecutors.directExecutor(); From 19147e3f71f9c1bae56622833b2762749e4d7539 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:27:01 -0500 Subject: [PATCH 06/40] Remove testing artifacts --- .../firebase-vertexai.gradle.kts | 9 - .../firebase/vertexai/LiveModelTests.kt | 183 ------------------ 2 files changed, 192 deletions(-) delete mode 100644 firebase-vertexai/src/test/java/com/google/firebase/vertexai/LiveModelTests.kt diff --git a/firebase-vertexai/firebase-vertexai.gradle.kts b/firebase-vertexai/firebase-vertexai.gradle.kts index c1c5a537956..f728e905cbb 100644 --- a/firebase-vertexai/firebase-vertexai.gradle.kts +++ b/firebase-vertexai/firebase-vertexai.gradle.kts @@ -121,15 +121,6 @@ dependencies { testImplementation(libs.robolectric) testImplementation(libs.truth) testImplementation(libs.mockito.core) - testImplementation("com.squareup.okhttp3:mockwebserver:4.12.0") - testImplementation(project(":integ-testing")) - testImplementation("io.ktor:ktor-server-websockets:2.3.2") { - // only needed for UI tests. Junit this could be ignored. - exclude("org.eclipse.jetty") - } - testImplementation("io.ktor:ktor-server-test-host:2.3.2") { - exclude("org.eclipse.jetty") - } androidTestImplementation(libs.androidx.espresso.core) androidTestImplementation(libs.androidx.test.junit) diff --git a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/LiveModelTests.kt b/firebase-vertexai/src/test/java/com/google/firebase/vertexai/LiveModelTests.kt deleted file mode 100644 index c22d655df5e..00000000000 --- a/firebase-vertexai/src/test/java/com/google/firebase/vertexai/LiveModelTests.kt +++ /dev/null @@ -1,183 +0,0 @@ -package com.google.firebase.vertexai - -import android.util.Base64 -import com.google.firebase.FirebaseApp -import com.google.firebase.vertexai.common.APIController -import com.google.firebase.vertexai.common.JSON -import com.google.firebase.vertexai.type.BASE_64_FLAGS -import com.google.firebase.vertexai.type.LiveSession -import com.google.firebase.vertexai.type.PublicPreviewAPI -import com.google.firebase.vertexai.type.RequestOptions -import com.google.firebase.vertexai.type.asInlineDataPartOrNull -import com.google.firebase.vertexai.util.TEST_APP_ID -import com.google.firebase.vertexai.util.TEST_CLIENT_ID -import com.google.firebase.vertexai.util.TEST_VERSION -import io.kotest.matchers.equals.shouldBeEqual -import io.ktor.client.plugins.websocket.WebSockets as ClientWebsockets -import io.ktor.server.testing.testApplication -import io.ktor.server.websocket.WebSockets as ServerWebsockets -import io.ktor.server.websocket.webSocket -import io.ktor.utils.io.ByteChannel -import io.ktor.websocket.Frame -import java.util.concurrent.atomic.AtomicInteger -import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.delay -import kotlinx.coroutines.launch -import kotlinx.coroutines.newFixedThreadPoolContext -import kotlinx.coroutines.withTimeout -import org.junit.Test -import org.junit.runner.RunWith -import org.mockito.Mockito -import org.robolectric.RobolectricTestRunner - -@OptIn(PublicPreviewAPI::class) -@RunWith(RobolectricTestRunner::class) -internal class LiveModelTests { - @Test - fun `(generateContent) generates a proper response`() = testApplication { - val client = createClient { install(ClientWebsockets) } - - val testUrl = "/ws" - - install(ServerWebsockets) - - val bytes = "Hello World!".toByteArray() - - val testResponse = - JSON.decodeFromString( - """ - { - "serverContent": { - "modelTurn": { - "role": "server", - "parts": [{ - "inlineData": { - "mimeType": "audio/pcm", - "data": ${Base64.encodeToString(bytes, BASE_64_FLAGS)} - } - }] - } - } - } - """ - .trimIndent() - ) - - routing { - webSocket(path = testUrl) { - send(Frame.Text("setupComplete")) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - // send(Frame.Binary(true, - // JSON.encodeToString(LiveSession.LiveServerContentSetup.Internal.serializer(), - // testResponse).toByteArray())) - send( - Frame.Binary( - true, - JSON.encodeToString( - LiveSession.LiveServerContentSetup.Internal.serializer(), - testResponse - ) - .toByteArray() - ) - ) - - for (frame in incoming) { - - } - } - } - - val channel = ByteChannel(autoFlush = true) - val mockFirebaseApp = Mockito.mock() - Mockito.`when`(mockFirebaseApp.isDataCollectionDefaultEnabled).thenReturn(false) - - val apiController = - Mockito.spy( - APIController( - "super_cool_test_key", - "gemini-pro", - RequestOptions(), - client.engine, - TEST_CLIENT_ID, - mockFirebaseApp, - TEST_VERSION, - TEST_APP_ID, - null, - ) - ) - - Mockito.doReturn(testUrl).`when`(apiController).getBidiEndpoint(Mockito.anyString()) - // Mockito.`when`(apiController.getBidiEndpoint(Mockito.anyString())).thenReturn(server.url("/").toString()) - - // val scope = CoroutineScope(firebaseExecutors.blocking) - val model = - LiveGenerativeModel( - "cool-model-name", - location = "us-central1", - backgroundDispatcher = Dispatchers.IO, - controller = apiController - ) - - // setupComplete - withTimeout(5_000) { - // channel.send("setupComplete".toByteArray()) - val connection = model.connect() - val value = AtomicInteger(0) - val currDelay = 100L - val scope = CoroutineScope(Dispatchers.IO) - scope.launch { - println("Launching 1") - connection.receive().collect { - println(""" - Got message: - Status: ${it.status} - Data: ${it.data != null} - Parts: ${it.data?.parts?.size} - InlineData Size: ${it.data?.parts?.first()?.asInlineDataPartOrNull()?.inlineData?.size} - - """.trimIndent()) - println("1 => ${it.data?.parts?.first()?.asInlineDataPartOrNull()?.inlineData?.size}") - value.incrementAndGet() - } - println("Done with 1") - } - - delay(currDelay) - println("Sending") - delay(currDelay) - connection.send("") - delay(currDelay) - delay(currDelay) - //connection.stopReceiving() - connection.close() - delay(currDelay) - value.get().shouldBeEqual(1) - } - - // // setupComplete - // withTimeout(10_000) { - // //channel.send("setupComplete".toByteArray()) - // val connection = model.connect() - // connection.startAudioConversation() - // //delay(5_000) - // runCurrent() - // connection.stopAudioConversation() - // connection.close() - // } - } -} From 61f6f4cb7b0d5c256c48b6318d57c4377d03b3b6 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:28:43 -0500 Subject: [PATCH 07/40] Update APIController.kt --- .../kotlin/com/google/firebase/vertexai/common/APIController.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt index d5c538a1ef8..f82d4866cf6 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/APIController.kt @@ -160,7 +160,7 @@ internal constructor( throw FirebaseCommonAIException.from(e) } - internal fun getBidiEndpoint(location: String): String = + private fun getBidiEndpoint(location: String): String = "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/$location?key=$key" suspend fun getWebSocketSession(location: String): ClientWebSocketSession = From 6df1ef0c0cb3aeda3be91c1f2ebbd01f1f44d4af Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:45:50 -0500 Subject: [PATCH 08/40] Formatting --- .../vertexai/java/LiveSessionFutures.kt | 3 +- .../firebase/vertexai/type/LiveSession.kt | 46 +++++++++---------- .../com/google/firebase/vertexai/type/Part.kt | 16 ++----- 3 files changed, 28 insertions(+), 37 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index 77291b864c7..d9ce36187c5 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -71,7 +71,8 @@ public abstract class LiveSessionFutures internal constructor() { /** * Streams client data to the server. * - * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be sent. + * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be + * sent. */ public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 77721deadd7..a1d66a8b612 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -32,13 +32,10 @@ import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext import kotlin.coroutines.EmptyCoroutineContext import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.DelicateCoroutinesApi import kotlinx.coroutines.Job import kotlinx.coroutines.cancel -import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.currentCoroutineContext -import kotlinx.coroutines.delay import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.flow @@ -117,7 +114,7 @@ internal constructor( } scope = CoroutineScope(backgroundDispatcher + childJob()) - audioHelper = AudioHelper.Build() + audioHelper = AudioHelper.build() recordUserAudio() processModelResponses(functionCallHandler) @@ -129,7 +126,7 @@ internal constructor( * [startAudioConversation] */ public fun stopAudioConversation() { - if(!startedReceiving.get()) return + if (!startedReceiving.get()) return scope.cancel() playBackQueue.clear() @@ -156,9 +153,7 @@ internal constructor( return flow.transform { frame -> val response = frameToLiveContentResponse(frame) - response?.let { - emit(it) - } + response?.let { emit(it) } } } @@ -188,7 +183,10 @@ internal constructor( ) { val jsonString = Json.encodeToString( - LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal() as InlineDataPart.Internal).inlineData }).toInternal() + LiveClientRealtimeInputSetup( + mediaChunks.map { (it.toInternal() as InlineDataPart.Internal).inlineData } + ) + .toInternal() ) session.send(Frame.Text(jsonString)) } @@ -313,7 +311,7 @@ internal constructor( private fun frameToLiveContentResponse(frame: Frame): LiveContentResponse? { val jsonMessage = Json.parseToJsonElement(frame.readBytes().toString(Charsets.UTF_8)) - if(jsonMessage !is JsonObject) { + if (jsonMessage !is JsonObject) { Log.w(TAG, "Server response was not a JsonObject: $jsonMessage") return null } @@ -330,17 +328,15 @@ internal constructor( ) } "serverContent" in jsonMessage -> { - val serverContent = JSON.decodeFromJsonElement(jsonMessage).serverContent - val status = when { - serverContent.turnComplete == true -> LiveContentResponse.Status.TURN_COMPLETE - serverContent.interrupted == true -> LiveContentResponse.Status.INTERRUPTED - else -> LiveContentResponse.Status.NORMAL - } - LiveContentResponse( - serverContent.modelTurn?.toPublic(), - status, - null - ) + val serverContent = + JSON.decodeFromJsonElement(jsonMessage).serverContent + val status = + when { + serverContent.turnComplete == true -> LiveContentResponse.Status.TURN_COMPLETE + serverContent.interrupted == true -> LiveContentResponse.Status.INTERRUPTED + else -> LiveContentResponse.Status.NORMAL + } + LiveContentResponse(serverContent.modelTurn?.toPublic(), status, null) } else -> { Log.w(TAG, "Failed to decode the server response: $jsonMessage") @@ -433,11 +429,15 @@ internal constructor( * * End of turn is derived from user activity (eg; end of speech). */ - internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { + internal class LiveClientRealtimeInputSetup( + val mediaChunks: List + ) { @Serializable internal class Internal(val realtimeInput: LiveClientRealtimeInput) { @Serializable - internal data class LiveClientRealtimeInput(val mediaChunks: List) + internal data class LiveClientRealtimeInput( + val mediaChunks: List + ) } fun toInternal() = Internal(Internal.LiveClientRealtimeInput(mediaChunks)) } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt index 540d6d757b7..b2538a8d6a0 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Part.kt @@ -81,10 +81,7 @@ public class FunctionCallPart( internal data class Internal(val functionCall: FunctionCall) : InternalPart { @Serializable - internal data class FunctionCall( - val name: String, - val args: Map? = null - ) + internal data class FunctionCall(val name: String, val args: Map? = null) } } @@ -95,19 +92,12 @@ public class FunctionCallPart( * @param response the response produced by the function as a [JSONObject] */ // TODO(b/410040441): Support id property -public class FunctionResponsePart( - public val name: String, - public val response: JsonObject -) : Part { +public class FunctionResponsePart(public val name: String, public val response: JsonObject) : Part { @Serializable internal data class Internal(val functionResponse: FunctionResponse) : InternalPart { - @Serializable - internal data class FunctionResponse( - val name: String, - val response: JsonObject - ) + @Serializable internal data class FunctionResponse(val name: String, val response: JsonObject) } internal fun toInternalFunctionCall(): Internal.FunctionResponse { From b3e5bd9af967204d365fe8792daff9a2bc74ea87 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:46:02 -0500 Subject: [PATCH 09/40] Add docs to AudioHelper --- .../firebase/vertexai/common/util/android.kt | 36 +++++++++ .../firebase/vertexai/type/AudioHelper.kt | 76 ++++++++++++------- 2 files changed, 86 insertions(+), 26 deletions(-) create mode 100644 firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt new file mode 100644 index 00000000000..bc30aba9490 --- /dev/null +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -0,0 +1,36 @@ +package com.google.firebase.vertexai.common.util + +import android.media.AudioRecord +import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.yield + +/** + * The minimum buffer size for this instance. + * + * The same as calling [AudioRecord.getMinBufferSize], except the params are pre-populated. + */ +internal val AudioRecord.minBufferSize: Int + get() = AudioRecord.getMinBufferSize(sampleRate, channelConfiguration, audioFormat) + +/** + * Reads from this [AudioRecord] and returns the data in a flow. + * + * Will pause when this instance temporarily stops recording. + */ +internal fun AudioRecord.readAsFlow() = flow { + val buffer = ByteArray(minBufferSize) + + while (true) { + if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { + yield() + continue + } + + val bytesRead = read(buffer, 0, buffer.size) + if (bytesRead > 0) { + emit(buffer.copyOf(bytesRead)) + } else { + yield() + } + } +} diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index 2729db2426b..4b07e35a9f6 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -25,20 +25,32 @@ import android.media.AudioTrack import android.media.MediaRecorder import android.media.audiofx.AcousticEchoCanceler import androidx.annotation.RequiresPermission -import kotlinx.coroutines.delay +import com.google.firebase.vertexai.common.util.readAsFlow import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.emptyFlow -import kotlinx.coroutines.flow.flow +/** + * Helper class for recording audio and playing back a separate audio track at the same time. + * + * @see AudioHelper.build + */ @PublicPreviewAPI internal class AudioHelper( - // Record for recording the user's mic + /** Record for recording the System microphone. */ private val recorder: AudioRecord, - // Track for playing back what the model says + /** Track for playing back what the model says. */ private val playbackTrack: AudioTrack, ) { private var released: Boolean = false + /** + * Release the system resources on the recorder and playback track. + * + * Once an [AudioHelper] has been "released", it can _not_ be used again. + * + * This method can safely be called multiple times, as it won't do anything if this instance has + * already been released. + */ fun release() { if (released) return released = true @@ -47,24 +59,48 @@ internal class AudioHelper( playbackTrack.release() } + /** + * Play the provided audio data on the playback track. + * + * Does nothing if this [AudioHelper] has been [released][release]. + */ fun playAudio(data: ByteArray) { if (released) return playbackTrack.write(data, 0, data.size) } + /** + * Pause the recording of the microphone, if it's recording. + * + * Does nothing if this [AudioHelper] has been [released][release]. + * + * @see resumeRecording + */ fun pauseRecording() { if (released || recorder.recordingState == AudioRecord.RECORDSTATE_STOPPED) return recorder.stop() } + /** + * Resumes the recording of the microphone, if it's not already running. + * + * Does nothing if this [AudioHelper] has been [released][release]. + * + * @see pauseRecording + */ fun resumeRecording() { if (released || recorder.recordingState == AudioRecord.RECORDSTATE_RECORDING) return recorder.startRecording() } + /** + * Start perpetually recording the system microphone, and return the bytes read in a flow. + * + * Returns an empty flow if this [AudioHelper] has been [released][release]. + */ fun listenToRecording(): Flow { if (released) return emptyFlow() @@ -74,8 +110,17 @@ internal class AudioHelper( } companion object { + /** + * Creates an instance of [AudioHelper] with the track and record initialized. + * + * A separate build method is necessary so that we can properly propagate the required manifest + * permission, and throw exceptions when needed. + * + * It also makes it easier to read, since the long initialization is separate from the + * constructor. + */ @RequiresPermission(Manifest.permission.RECORD_AUDIO) - fun Build(): AudioHelper { + fun build(): AudioHelper { val playbackTrack = AudioTrack( AudioAttributes.Builder().setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION).build(), @@ -128,24 +173,3 @@ internal class AudioHelper( } } } - -internal val AudioRecord.minBufferSize: Int - get() = AudioRecord.getMinBufferSize(sampleRate, channelConfiguration, audioFormat) - -internal fun AudioRecord.readAsFlow() = flow { - val buffer = ByteArray(minBufferSize) - - while (true) { - if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { - delay(1) - continue - } - - val bytesRead = read(buffer, 0, buffer.size) - if (bytesRead > 0) { - emit(buffer.copyOf(bytesRead)) - } else { - delay(1) - } - } -} From 28171b4d88af1aefe3afa7c3e4b8015d4b4e287c Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 13:52:42 -0500 Subject: [PATCH 10/40] Update AudioHelper.kt --- .../main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index 4b07e35a9f6..6e14c23bc44 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -33,6 +33,7 @@ import kotlinx.coroutines.flow.emptyFlow * Helper class for recording audio and playing back a separate audio track at the same time. * * @see AudioHelper.build + * @see LiveSession.startAudioConversation */ @PublicPreviewAPI internal class AudioHelper( From 5df85dcfda1afb2c55e9bfa93377ed9f11d0b3b8 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 14:03:09 -0500 Subject: [PATCH 11/40] Document accumulateUntil --- .../firebase/vertexai/common/util/kotlin.kt | 36 +++++++++++++++++++ .../firebase/vertexai/type/LiveSession.kt | 25 ++----------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt index bf806528781..ae7d4a34ccf 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt @@ -17,6 +17,8 @@ package com.google.firebase.vertexai.common.util import java.lang.reflect.Field +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.flow /** * Removes the last character from the [StringBuilder]. @@ -39,3 +41,37 @@ internal fun StringBuilder.removeLast(): StringBuilder = * ``` */ internal inline fun Field.getAnnotation() = getAnnotation(T::class.java) + +/** + * Collects bytes from this flow and doesn't emit them back until [minSize] is reached. + * + * For example: + * ``` + * val byteArr = flowOf(byteArrayOf(1), byteArrayOf(2, 3, 4), byteArrayOf(5, 6, 7, 8)) + * val expectedResult = listOf(byteArrayOf(1, 2, 3, 4), byteArrayOf( 5, 6, 7, 8)) + * + * byteArr.accumulateUntil(4).toList() shouldContainExactly expectedResult + * ``` + * + * @param minSize The minimum about of bytes the array should have before being sent down-stream + * @param emitLeftOvers If the flow completes and there are bytes left over that don't meet the + * [minSize], send them anyways. + */ +internal fun Flow.accumulateUntil( + minSize: Int, + emitLeftOvers: Boolean = false +): Flow = flow { + val buffer = mutableListOf() + + collect { + buffer.addAll(it.asSequence()) + if (buffer.size >= minSize) { + emit(buffer.toByteArray()) + buffer.clear() + } + } + + if (emitLeftOvers && buffer.isNotEmpty()) { + emit(buffer.toByteArray()) + } +} diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index a1d66a8b612..5451a19826f 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -23,6 +23,7 @@ import android.util.Log import androidx.annotation.RequiresPermission import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.vertexai.common.JSON +import com.google.firebase.vertexai.common.util.accumulateUntil import io.ktor.client.plugins.websocket.ClientWebSocketSession import io.ktor.websocket.Frame import io.ktor.websocket.close @@ -42,6 +43,7 @@ import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onEach import kotlinx.coroutines.flow.receiveAsFlow +import kotlinx.coroutines.flow.toList import kotlinx.coroutines.flow.transform import kotlinx.coroutines.isActive import kotlinx.coroutines.launch @@ -54,29 +56,6 @@ import kotlinx.serialization.json.JsonNull import kotlinx.serialization.json.JsonObject import kotlinx.serialization.json.decodeFromJsonElement -// TODO: maybe dont use a bytearray, maybe use a channel or list to avoid accidental overflow -internal fun Flow.accumulateUntil( - minSize: Int, - emitLeftOvers: Boolean = false -): Flow = flow { - var offset = 0 - val audioBuffer = ByteArray(minSize * 2) - - collect { - it.copyInto(audioBuffer, offset) - offset += it.size - if (offset >= minSize) { - emit(audioBuffer.clone()) - audioBuffer.fill(0) - offset = 0 - } - } - // Emit any leftover bytes (optional) - if (emitLeftOvers && offset > 0) { - emit(audioBuffer.clone()) - } -} - /** Represents a live WebSocket session capable of streaming content to and from the server. */ @PublicPreviewAPI @OptIn(ExperimentalSerializationApi::class) From 1bed4eb3cc2e3b94ac57b045ca0cf9024420accf Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 15:45:13 -0500 Subject: [PATCH 12/40] Add back stopReceiving --- .../firebase/vertexai/type/LiveSession.kt | 50 ++++++++++++++++--- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 5451a19826f..f23ab7a6cd1 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -105,15 +105,13 @@ internal constructor( * [startAudioConversation] */ public fun stopAudioConversation() { - if (!startedReceiving.get()) return + if (!startedReceiving.getAndSet(false)) return scope.cancel() playBackQueue.clear() audioHelper?.release() audioHelper = null - - startedReceiving.set(false) } /** @@ -130,10 +128,48 @@ internal constructor( throw SessionAlreadyReceivingException() } - return flow.transform { frame -> - val response = frameToLiveContentResponse(frame) - response?.let { emit(it) } - } + // TODO(b/410059569): Remove when fixed + return flow { + while (true) { + val response = session.incoming.tryReceive() + if (response.isClosed || !startedReceiving.get()) break + + val frame = response.getOrNull() + frame?.let { frameToLiveContentResponse(it) }?.let { emit(it) } + + yield() + } + } + .onCompletion { stopAudioConversation() } + + // TODO(b/410059569): Add back when fixed + // return session.incoming.receiveAsFlow().transform { frame -> + // val response = frameToLiveContentResponse(frame) + // response?.let { emit(it) } + // }.onCompletion { + // stopAudioConversation() + // } + } + + /** + * Stops receiving from the model. + * + * If this function is called during an ongoing audio conversation, the model's response will not + * be received, and no audio will be played; the live session object will no longer receive data + * from the server. + * + * To resume receiving data, you must either handle it directly using [receive], or indirectly by + * using [startAudioConversation]. + */ + // TODO(b/410059569): Remove when fixed + public fun stopReceiving() { + if (!startedReceiving.getAndSet(false)) return + + scope.cancel() + playBackQueue.clear() + + audioHelper?.release() + audioHelper = null } /** From 89ec5a9f749e07b1f07229e1ab42df49cdf9ee3d Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 15:45:26 -0500 Subject: [PATCH 13/40] Add additional documentation --- .../firebase/vertexai/type/LiveSession.kt | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index f23ab7a6cd1..f8050085f97 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -41,8 +41,8 @@ import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.launchIn +import kotlinx.coroutines.flow.onCompletion import kotlinx.coroutines.flow.onEach -import kotlinx.coroutines.flow.receiveAsFlow import kotlinx.coroutines.flow.toList import kotlinx.coroutines.flow.transform import kotlinx.coroutines.isActive @@ -63,13 +63,27 @@ public class LiveSession internal constructor( private val session: ClientWebSocketSession, @Blocking private val backgroundDispatcher: CoroutineContext, - // TODO: might need to be AtomicRef private var audioHelper: AudioHelper? = null ) { - private val flow = session.incoming.receiveAsFlow() + /** + * Coroutine scope that we batch data on for [startAudioConversation]. + * + * Makes it easy to stop all the work with [stopAudioConversation] by just cancelling the scope. + */ private var scope = CancelledCoroutineScope + /** + * Playback audio data sent from the model. + * + * Effectively, this is what the model is saying. + */ private val playBackQueue = ConcurrentLinkedQueue() + + /** + * Toggled whenever [receive] and [stopReceiving] are called. + * + * Used to ensure only one flow is consuming the playback at once. + */ private val startedReceiving = AtomicBoolean(false) /** From b3b9678afea198065e25198adff5e3819a85d270 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 15:53:03 -0500 Subject: [PATCH 14/40] Cleanup javadocs --- .../firebase/vertexai/type/LiveSession.kt | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index f8050085f97..78596d4290f 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -87,10 +87,10 @@ internal constructor( private val startedReceiving = AtomicBoolean(false) /** - * Starts an audio conversation with the Gemini server, which can only be stopped using - * [stopAudioConversation]. + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. * - * @param functionCallHandler A callback function that is invoked whenever the server receives a + * @param functionCallHandler A callback function that is invoked whenever the model receives a * function call. */ @RequiresPermission(RECORD_AUDIO) @@ -115,8 +115,11 @@ internal constructor( } /** - * Stops the audio conversation with the Gemini Server. This needs to be called only after calling - * [startAudioConversation] + * Stops the audio conversation with the model. + * + * This only needs to be called after a previous call to [startAudioConversation]. + * + * If there is no audio conversation currently active, this function does nothing. */ public fun stopAudioConversation() { if (!startedReceiving.getAndSet(false)) return @@ -133,9 +136,10 @@ internal constructor( * * Call [close] to stop receiving responses from the model. * - * @return A [Flow] which will emit [LiveContentResponse] as and when it receives it + * @return A [Flow] which will emit [LiveContentResponse] from the model. * * @throws [SessionAlreadyReceivingException] when the session is already receiving. + * @see stopReceiving */ public fun receive(): Flow { if (startedReceiving.getAndSet(true)) { @@ -187,7 +191,7 @@ internal constructor( } /** - * Sends the function calling responses to the server. + * Sends function calling responses to the model. * * @param functionList The list of [FunctionResponsePart] instances indicating the function * response from the client. @@ -201,8 +205,9 @@ internal constructor( } /** - * Streams client data to the server. Calling this after [startAudioConversation] will play the - * response audio immediately. + * Streams client data to the model. + * + * Calling this after [startAudioConversation] will play the response audio immediately. * * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be * sent. @@ -221,10 +226,11 @@ internal constructor( } /** - * Sends data to the server. Calling this after [startAudioConversation] will play the response - * audio immediately. + * Sends data to the model. + * + * Calling this after [startAudioConversation] will play the response audio immediately. * - * @param content Client [Content] to be sent to the server. + * @param content Client [Content] to be sent to the model. */ public suspend fun send(content: Content) { val jsonString = @@ -233,16 +239,22 @@ internal constructor( } /** - * Sends text to the server. Calling this after [startAudioConversation] will play the response - * audio immediately. + * Sends text to the model. * - * @param text Text to be sent to the server. + * Calling this after [startAudioConversation] will play the response audio immediately. + * + * @param text Text to be sent to the model. */ public suspend fun send(text: String) { send(Content.Builder().text(text).build()) } - /** Closes the client session. */ + /** + * Closes the client session. + * + * Once a [LiveSession] is closed, it can not be reopened; you'll need to start a new + * [LiveSession]. + */ public suspend fun close() { session.close() stopAudioConversation() From fbae7cb313e7bdf19ce3e5e54285276ce08f5a3f Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 15:56:59 -0500 Subject: [PATCH 15/40] Use blocking instead of background dispatcher --- .../com/google/firebase/vertexai/FirebaseVertexAI.kt | 6 +++--- .../vertexai/FirebaseVertexAIMultiResourceComponent.kt | 6 +++--- .../firebase/vertexai/FirebaseVertexAIRegistrar.kt | 10 +++++----- .../google/firebase/vertexai/LiveGenerativeModel.kt | 10 +++++----- .../com/google/firebase/vertexai/type/LiveSession.kt | 5 ++--- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAI.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAI.kt index 7c90e78c402..c36ec25d078 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAI.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAI.kt @@ -19,7 +19,7 @@ package com.google.firebase.vertexai import android.util.Log import com.google.firebase.Firebase import com.google.firebase.FirebaseApp -import com.google.firebase.annotations.concurrent.Background +import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.app import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider @@ -41,7 +41,7 @@ import kotlin.coroutines.CoroutineContext public class FirebaseVertexAI internal constructor( private val firebaseApp: FirebaseApp, - @Background private val backgroundDispatcher: CoroutineContext, + @Blocking private val blockingDispatcher: CoroutineContext, private val location: String, private val appCheckProvider: Provider, private val internalAuthProvider: Provider, @@ -133,7 +133,7 @@ internal constructor( "projects/${firebaseApp.options.projectId}/locations/${location}/publishers/google/models/${modelName}", firebaseApp.options.apiKey, firebaseApp, - backgroundDispatcher, + blockingDispatcher, generationConfig, tools, systemInstruction, diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIMultiResourceComponent.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIMultiResourceComponent.kt index 1b9cb7a4909..526e1f87be8 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIMultiResourceComponent.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIMultiResourceComponent.kt @@ -18,7 +18,7 @@ package com.google.firebase.vertexai import androidx.annotation.GuardedBy import com.google.firebase.FirebaseApp -import com.google.firebase.annotations.concurrent.Background +import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.inject.Provider @@ -31,7 +31,7 @@ import kotlin.coroutines.CoroutineContext */ internal class FirebaseVertexAIMultiResourceComponent( private val app: FirebaseApp, - @Background val backgroundDispatcher: CoroutineContext, + @Blocking val blockingDispatcher: CoroutineContext, private val appCheckProvider: Provider, private val internalAuthProvider: Provider, ) { @@ -43,7 +43,7 @@ internal class FirebaseVertexAIMultiResourceComponent( instances[location] ?: FirebaseVertexAI( app, - backgroundDispatcher, + blockingDispatcher, location, appCheckProvider, internalAuthProvider diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIRegistrar.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIRegistrar.kt index ff5409567a9..13cb73cdb71 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIRegistrar.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/FirebaseVertexAIRegistrar.kt @@ -18,7 +18,7 @@ package com.google.firebase.vertexai import androidx.annotation.Keep import com.google.firebase.FirebaseApp -import com.google.firebase.annotations.concurrent.Background +import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.components.Component @@ -41,13 +41,13 @@ internal class FirebaseVertexAIRegistrar : ComponentRegistrar { Component.builder(FirebaseVertexAIMultiResourceComponent::class.java) .name(LIBRARY_NAME) .add(Dependency.required(firebaseApp)) - .add(Dependency.required(backgroundDispatcher)) + .add(Dependency.required(blockingDispatcher)) .add(Dependency.optionalProvider(appCheckInterop)) .add(Dependency.optionalProvider(internalAuthProvider)) .factory { container -> FirebaseVertexAIMultiResourceComponent( container[firebaseApp], - container.get(backgroundDispatcher), + container.get(blockingDispatcher), container.getProvider(appCheckInterop), container.getProvider(internalAuthProvider) ) @@ -62,7 +62,7 @@ internal class FirebaseVertexAIRegistrar : ComponentRegistrar { private val firebaseApp = unqualified(FirebaseApp::class.java) private val appCheckInterop = unqualified(InteropAppCheckTokenProvider::class.java) private val internalAuthProvider = unqualified(InternalAuthProvider::class.java) - private val backgroundDispatcher = - Qualified.qualified(Background::class.java, CoroutineDispatcher::class.java) + private val blockingDispatcher = + Qualified.qualified(Blocking::class.java, CoroutineDispatcher::class.java) } } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt index 70a5a18af2b..60715edb866 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt @@ -17,7 +17,7 @@ package com.google.firebase.vertexai import com.google.firebase.FirebaseApp -import com.google.firebase.annotations.concurrent.Background +import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.vertexai.common.APIController @@ -47,7 +47,7 @@ import kotlinx.serialization.json.Json public class LiveGenerativeModel internal constructor( private val modelName: String, - @Background private val backgroundDispatcher: CoroutineContext, + @Blocking private val blockingDispatcher: CoroutineContext, private val config: LiveGenerationConfig? = null, private val tools: List? = null, private val systemInstruction: Content? = null, @@ -58,7 +58,7 @@ internal constructor( modelName: String, apiKey: String, firebaseApp: FirebaseApp, - backgroundDispatcher: CoroutineContext, + blockingDispatcher: CoroutineContext, config: LiveGenerationConfig? = null, tools: List? = null, systemInstruction: Content? = null, @@ -68,7 +68,7 @@ internal constructor( internalAuthProvider: InternalAuthProvider? = null, ) : this( modelName, - backgroundDispatcher, + blockingDispatcher, config, tools, systemInstruction, @@ -107,7 +107,7 @@ internal constructor( val receivedJson = webSession.incoming.receive().readBytes().toString(Charsets.UTF_8) // TODO: Try to decode the json instead of string matching. return if (receivedJson.contains("setupComplete")) { - LiveSession(session = webSession, backgroundDispatcher = backgroundDispatcher) + LiveSession(session = webSession, blockingDispatcher = blockingDispatcher) } else { webSession.close() throw ServiceConnectionHandshakeFailedException("Unable to connect to the server") diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 78596d4290f..b043ff0aedf 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -43,7 +43,6 @@ import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onCompletion import kotlinx.coroutines.flow.onEach -import kotlinx.coroutines.flow.toList import kotlinx.coroutines.flow.transform import kotlinx.coroutines.isActive import kotlinx.coroutines.launch @@ -62,7 +61,7 @@ import kotlinx.serialization.json.decodeFromJsonElement public class LiveSession internal constructor( private val session: ClientWebSocketSession, - @Blocking private val backgroundDispatcher: CoroutineContext, + @Blocking private val blockingDispatcher: CoroutineContext, private var audioHelper: AudioHelper? = null ) { /** @@ -106,7 +105,7 @@ internal constructor( return } - scope = CoroutineScope(backgroundDispatcher + childJob()) + scope = CoroutineScope(blockingDispatcher + childJob()) audioHelper = AudioHelper.build() recordUserAudio() From 3d390a1f4d835d0b1584e1c00e2ab787b0bc2d22 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:24:44 -0500 Subject: [PATCH 16/40] Emit empty buffer if no data is read --- .../com/google/firebase/vertexai/common/util/android.kt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index bc30aba9490..6c80f96243d 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -15,10 +15,11 @@ internal val AudioRecord.minBufferSize: Int /** * Reads from this [AudioRecord] and returns the data in a flow. * - * Will pause when this instance temporarily stops recording. + * Will emit a zeroed out buffer when this instance is not recording. */ internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) + val emptyBuffer = ByteArray(minBufferSize) while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { @@ -30,7 +31,7 @@ internal fun AudioRecord.readAsFlow() = flow { if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) } else { - yield() + emit(emptyBuffer) } } } From 51d1fec426337cbd4da845ac252d900675a0ee94 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:29:28 -0500 Subject: [PATCH 17/40] Add documentation for util methods --- .../firebase/vertexai/common/util/kotlin.kt | 23 +++++++++++++++++++ .../firebase/vertexai/type/LiveSession.kt | 9 ++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt index ae7d4a34ccf..e459a5cfe12 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt @@ -17,6 +17,11 @@ package com.google.firebase.vertexai.common.util import java.lang.reflect.Field +import kotlin.coroutines.EmptyCoroutineContext +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Job +import kotlinx.coroutines.cancel +import kotlinx.coroutines.currentCoroutineContext import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.flow @@ -75,3 +80,21 @@ internal fun Flow.accumulateUntil( emit(buffer.toByteArray()) } } + +/** + * Create a [Job] that is a child of the [currentCoroutineContext], if any. + * + * This is useful when you want a coroutine scope to be canceled when its parent scope is canceled, + * and you don't have full control over the parent scope, but you don't want the cancellation of the + * child to impact the parent. + * + * If the parent coroutine context does not have a job, an empty one will be created. + */ +internal suspend inline fun childJob() = Job(currentCoroutineContext()[Job] ?: Job()) + +/** + * A constant value pointing to a cancelled [CoroutineScope]. + * + * Useful when you want to initialize a mutable [CoroutineScope] in a canceled state. + */ +internal val CancelledCoroutineScope = CoroutineScope(EmptyCoroutineContext).apply { cancel() } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index b043ff0aedf..c5fb9f20711 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -23,7 +23,9 @@ import android.util.Log import androidx.annotation.RequiresPermission import com.google.firebase.annotations.concurrent.Blocking import com.google.firebase.vertexai.common.JSON +import com.google.firebase.vertexai.common.util.CancelledCoroutineScope import com.google.firebase.vertexai.common.util.accumulateUntil +import com.google.firebase.vertexai.common.util.childJob import io.ktor.client.plugins.websocket.ClientWebSocketSession import io.ktor.websocket.Frame import io.ktor.websocket.close @@ -31,12 +33,9 @@ import io.ktor.websocket.readBytes import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicBoolean import kotlin.coroutines.CoroutineContext -import kotlin.coroutines.EmptyCoroutineContext import kotlinx.coroutines.CoroutineScope -import kotlinx.coroutines.Job import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED -import kotlinx.coroutines.currentCoroutineContext import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer import kotlinx.coroutines.flow.flow @@ -492,7 +491,3 @@ internal constructor( ) } } - -internal suspend inline fun childJob() = Job(currentCoroutineContext()[Job] ?: Job()) - -internal val CancelledCoroutineScope = CoroutineScope(EmptyCoroutineContext).apply { cancel() } From 7de0eb7c6d5873e752f287dbb6445a8638649c67 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:32:56 -0500 Subject: [PATCH 18/40] Decode setupComplete to json --- .../com/google/firebase/vertexai/LiveGenerativeModel.kt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt index 60715edb866..5d4adbfc189 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt @@ -22,6 +22,7 @@ import com.google.firebase.appcheck.interop.InteropAppCheckTokenProvider import com.google.firebase.auth.internal.InternalAuthProvider import com.google.firebase.vertexai.common.APIController import com.google.firebase.vertexai.common.AppCheckHeaderProvider +import com.google.firebase.vertexai.common.JSON import com.google.firebase.vertexai.type.Content import com.google.firebase.vertexai.type.LiveClientSetupMessage import com.google.firebase.vertexai.type.LiveGenerationConfig @@ -38,6 +39,7 @@ import kotlinx.coroutines.channels.ClosedReceiveChannelException import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.encodeToString import kotlinx.serialization.json.Json +import kotlinx.serialization.json.JsonObject /** * Represents a multimodal model (like Gemini) capable of real-time content generation based on @@ -104,9 +106,10 @@ internal constructor( try { val webSession = controller.getWebSocketSession(location) webSession.send(Frame.Text(data)) - val receivedJson = webSession.incoming.receive().readBytes().toString(Charsets.UTF_8) - // TODO: Try to decode the json instead of string matching. - return if (receivedJson.contains("setupComplete")) { + val receivedJsonStr = webSession.incoming.receive().readBytes().toString(Charsets.UTF_8) + val receivedJson = JSON.parseToJsonElement(receivedJsonStr) + + return if(receivedJson is JsonObject && "setupComplete" in receivedJson) { LiveSession(session = webSession, blockingDispatcher = blockingDispatcher) } else { webSession.close() From b137cd9bd7015bc916ab56d2ffb784182ae39566 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:37:36 -0500 Subject: [PATCH 19/40] Update javadocs --- .../kotlin/com/google/firebase/vertexai/type/LiveSession.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index c5fb9f20711..afdbe36c562 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -176,6 +176,8 @@ internal constructor( * * To resume receiving data, you must either handle it directly using [receive], or indirectly by * using [startAudioConversation]. + * + * @see close */ // TODO(b/410059569): Remove when fixed public fun stopReceiving() { @@ -224,7 +226,7 @@ internal constructor( } /** - * Sends data to the model. + * Sends [data][Content] to the model. * * Calling this after [startAudioConversation] will play the response audio immediately. * @@ -252,6 +254,8 @@ internal constructor( * * Once a [LiveSession] is closed, it can not be reopened; you'll need to start a new * [LiveSession]. + * + * @see stopReceiving */ public suspend fun close() { session.close() From 3b408dc4498a1312476a181af80f6a8d4324a7a0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:37:51 -0500 Subject: [PATCH 20/40] Update java javadocs to match kotlin --- .../vertexai/java/LiveSessionFutures.kt | 70 ++++++++++++++----- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index d9ce36187c5..f270bd87485 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -28,10 +28,11 @@ import com.google.firebase.vertexai.type.LiveContentResponse import com.google.firebase.vertexai.type.LiveSession import com.google.firebase.vertexai.type.PublicPreviewAPI import com.google.firebase.vertexai.type.SessionAlreadyReceivingException +import io.ktor.websocket.close +import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.reactive.asPublisher import org.reactivestreams.Publisher -// TODO(daymxn): Make sure the javadocs here match the kotlin ones /** * Wrapper class providing Java compatible methods for [LiveSession]. * @@ -41,25 +42,42 @@ import org.reactivestreams.Publisher public abstract class LiveSessionFutures internal constructor() { /** - * Starts an audio conversation with the Gemini server, which can only be stopped using - * [stopAudioConversation]. + * Starts an audio conversation with the model, which can only be stopped using + * [stopAudioConversation] or [close]. * - * @param functionCallHandler A callback function to map function calls from the server to their - * response parts. + * @param functionCallHandler A callback function that is invoked whenever the model receives a + * function call. */ public abstract fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ): ListenableFuture /** - * Stops the audio conversation with the Gemini Server. + * Stops the audio conversation with the model. * - * @see [startAudioConversation] + * This only needs to be called after a previous call to [startAudioConversation]. + * + * If there is no audio conversation currently active, this function does nothing. */ public abstract fun stopAudioConversation(): ListenableFuture /** - * Sends the function response from the client to the server. + * Stops receiving from the model. + * + * If this function is called during an ongoing audio conversation, the model's response will not + * be received, and no audio will be played; the live session object will no longer receive data + * from the server. + * + * To resume receiving data, you must either handle it directly using [receive], or indirectly by + * using [startAudioConversation]. + * + * @see close + */ + // TODO(b/410059569): Remove when fixed + public abstract fun stopReceiving() + + /** + * Sends function calling responses to the model. * * @param functionList The list of [FunctionResponsePart] instances indicating the function * response from the client. @@ -69,7 +87,9 @@ public abstract class LiveSessionFutures internal constructor() { ): ListenableFuture /** - * Streams client data to the server. + * Streams client data to the model. + * + * Calling this after [startAudioConversation] will play the response audio immediately. * * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be * sent. @@ -77,28 +97,42 @@ public abstract class LiveSessionFutures internal constructor() { public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture /** - * Sends [data][Content] to the server. + * Sends [data][Content] to the model. * - * @param content Client [Content] to be sent to the server. + * Calling this after [startAudioConversation] will play the response audio immediately. + * + * @param content Client [Content] to be sent to the model. */ public abstract fun send(content: Content): ListenableFuture /** - * Sends text to the server + * Sends text to the model. + * + * Calling this after [startAudioConversation] will play the response audio immediately. * - * @param text Text to be sent to the server. + * @param text Text to be sent to the model. */ public abstract fun send(text: String): ListenableFuture - /** Closes the client session. */ + /** + * Closes the client session. + * + * Once a [LiveSession] is closed, it can not be reopened; you'll need to start a new + * [LiveSession]. + * + * @see stopReceiving + */ public abstract fun close(): ListenableFuture /** - * Receives responses from the server for both streaming and standard requests. + * Receives responses from the model for both streaming and standard requests. * - * @return A [Publisher] which will emit [LiveContentResponse] as and when it receives it. + * Call [close] to stop receiving responses from the model. * - * @throws [SessionAlreadyReceivingException] When the session is already receiving. + * @return A [Publisher] which will emit [LiveContentResponse] from the model. + * + * @throws [SessionAlreadyReceivingException] when the session is already receiving. + * @see stopReceiving */ public abstract fun receive(): Publisher @@ -127,6 +161,8 @@ public abstract class LiveSessionFutures internal constructor() { override fun stopAudioConversation() = SuspendToFutureAdapter.launchFuture { session.stopAudioConversation() } + + override fun stopReceiving() = session.stopReceiving() } public companion object { From cc8969a5dc5ca18c83df883edaf440fd2a7a2d1e Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:56:01 -0500 Subject: [PATCH 21/40] Update CHANGELOG.md --- firebase-vertexai/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 4293874a293..0d935e1fdcc 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -2,6 +2,8 @@ * [feature] Added support for `HarmBlockThreshold.OFF`. See the [model documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters#how_to_configure_content_filters){: .external} for more information. +* [changed] **Breaking Change**: Removed `MediaData` in favor of `InlineDataPart`. (#6870) +* [fixed] Improved thread usage when using a `LiveGenerativeModel`. (#6870) # 16.3.0 * [feature] Emits a warning when attempting to use an incompatible model with From 8c99ced6e62019d016cc9365cdf7245b81afba36 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:56:29 -0500 Subject: [PATCH 22/40] fmt --- .../kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt | 2 +- .../com/google/firebase/vertexai/java/LiveSessionFutures.kt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt index 5d4adbfc189..d546e09cdd2 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/LiveGenerativeModel.kt @@ -109,7 +109,7 @@ internal constructor( val receivedJsonStr = webSession.incoming.receive().readBytes().toString(Charsets.UTF_8) val receivedJson = JSON.parseToJsonElement(receivedJsonStr) - return if(receivedJson is JsonObject && "setupComplete" in receivedJson) { + return if (receivedJson is JsonObject && "setupComplete" in receivedJson) { LiveSession(session = webSession, blockingDispatcher = blockingDispatcher) } else { webSession.close() diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index f270bd87485..5fd208219c0 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -29,7 +29,6 @@ import com.google.firebase.vertexai.type.LiveSession import com.google.firebase.vertexai.type.PublicPreviewAPI import com.google.firebase.vertexai.type.SessionAlreadyReceivingException import io.ktor.websocket.close -import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.reactive.asPublisher import org.reactivestreams.Publisher From c9dda5cbaee42d6f8a2e03582fdb2caa08a180eb Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 16:58:28 -0500 Subject: [PATCH 23/40] Add missing copyright --- .../firebase/vertexai/common/util/android.kt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index 6c80f96243d..982104c7a0c 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -1,3 +1,19 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.google.firebase.vertexai.common.util import android.media.AudioRecord From a0cb879f4a8fdc18a363f6ad79080d956fb7de1a Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 11 Apr 2025 17:02:07 -0500 Subject: [PATCH 24/40] Update api.txt --- firebase-vertexai/api.txt | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt index 137d49f490c..f6f5465f49e 100644 --- a/firebase-vertexai/api.txt +++ b/firebase-vertexai/api.txt @@ -131,7 +131,7 @@ package com.google.firebase.vertexai.java { method public abstract com.google.common.util.concurrent.ListenableFuture send(com.google.firebase.vertexai.type.Content content); method public abstract com.google.common.util.concurrent.ListenableFuture send(String text); method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); - method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); + method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); @@ -627,20 +627,12 @@ package com.google.firebase.vertexai.type { method public suspend Object? send(com.google.firebase.vertexai.type.Content content, kotlin.coroutines.Continuation); method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); - method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); - method public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); + method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); + method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } - @com.google.firebase.vertexai.type.PublicPreviewAPI public final class MediaData { - ctor public MediaData(byte[] data, String mimeType); - method public byte[] getData(); - method public String getMimeType(); - property public final byte[] data; - property public final String mimeType; - } - public final class ModalityTokenCount { method public operator com.google.firebase.vertexai.type.ContentModality component1(); method public operator int component2(); From b79f27f50e335479294e87fff768a6c5711d942e Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 14 Apr 2025 11:13:36 -0500 Subject: [PATCH 25/40] Add back MediaData --- firebase-vertexai/CHANGELOG.md | 2 +- firebase-vertexai/api.txt | 12 +++++- .../vertexai/java/LiveSessionFutures.kt | 9 ++--- .../firebase/vertexai/type/LiveSession.kt | 20 +++------- .../firebase/vertexai/type/MediaData.kt | 40 +++++++++++++++++++ 5 files changed, 61 insertions(+), 22 deletions(-) create mode 100644 firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 0d935e1fdcc..9ffeb7e9463 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -2,9 +2,9 @@ * [feature] Added support for `HarmBlockThreshold.OFF`. See the [model documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters#how_to_configure_content_filters){: .external} for more information. -* [changed] **Breaking Change**: Removed `MediaData` in favor of `InlineDataPart`. (#6870) * [fixed] Improved thread usage when using a `LiveGenerativeModel`. (#6870) + # 16.3.0 * [feature] Emits a warning when attempting to use an incompatible model with `GenerativeModel` or `ImagenModel`. diff --git a/firebase-vertexai/api.txt b/firebase-vertexai/api.txt index f6f5465f49e..8c29809b116 100644 --- a/firebase-vertexai/api.txt +++ b/firebase-vertexai/api.txt @@ -131,7 +131,7 @@ package com.google.firebase.vertexai.java { method public abstract com.google.common.util.concurrent.ListenableFuture send(com.google.firebase.vertexai.type.Content content); method public abstract com.google.common.util.concurrent.ListenableFuture send(String text); method public abstract com.google.common.util.concurrent.ListenableFuture sendFunctionResponse(java.util.List functionList); - method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); + method public abstract com.google.common.util.concurrent.ListenableFuture sendMediaStream(java.util.List mediaChunks); method public abstract com.google.common.util.concurrent.ListenableFuture startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler); method public abstract com.google.common.util.concurrent.ListenableFuture stopAudioConversation(); method public abstract void stopReceiving(); @@ -627,12 +627,20 @@ package com.google.firebase.vertexai.type { method public suspend Object? send(com.google.firebase.vertexai.type.Content content, kotlin.coroutines.Continuation); method public suspend Object? send(String text, kotlin.coroutines.Continuation); method public suspend Object? sendFunctionResponse(java.util.List functionList, kotlin.coroutines.Continuation); - method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); + method public suspend Object? sendMediaStream(java.util.List mediaChunks, kotlin.coroutines.Continuation); method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1? functionCallHandler = null, kotlin.coroutines.Continuation); method public void stopAudioConversation(); method public void stopReceiving(); } + @com.google.firebase.vertexai.type.PublicPreviewAPI public final class MediaData { + ctor public MediaData(byte[] data, String mimeType); + method public byte[] getData(); + method public String getMimeType(); + property public final byte[] data; + property public final String mimeType; + } + public final class ModalityTokenCount { method public operator com.google.firebase.vertexai.type.ContentModality component1(); method public operator int component2(); diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index 5fd208219c0..a7ffd5c0b92 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -23,9 +23,9 @@ import com.google.common.util.concurrent.ListenableFuture import com.google.firebase.vertexai.type.Content import com.google.firebase.vertexai.type.FunctionCallPart import com.google.firebase.vertexai.type.FunctionResponsePart -import com.google.firebase.vertexai.type.InlineDataPart import com.google.firebase.vertexai.type.LiveContentResponse import com.google.firebase.vertexai.type.LiveSession +import com.google.firebase.vertexai.type.MediaData import com.google.firebase.vertexai.type.PublicPreviewAPI import com.google.firebase.vertexai.type.SessionAlreadyReceivingException import io.ktor.websocket.close @@ -90,10 +90,9 @@ public abstract class LiveSessionFutures internal constructor() { * * Calling this after [startAudioConversation] will play the response audio immediately. * - * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be - * sent. + * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. */ - public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture + public abstract fun sendMediaStream(mediaChunks: List): ListenableFuture /** * Sends [data][Content] to the model. @@ -150,7 +149,7 @@ public abstract class LiveSessionFutures internal constructor() { override fun sendFunctionResponse(functionList: List) = SuspendToFutureAdapter.launchFuture { session.sendFunctionResponse(functionList) } - override fun sendMediaStream(mediaChunks: List) = + override fun sendMediaStream(mediaChunks: List) = SuspendToFutureAdapter.launchFuture { session.sendMediaStream(mediaChunks) } @RequiresPermission(RECORD_AUDIO) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index afdbe36c562..5fceaf4e8f8 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -209,18 +209,14 @@ internal constructor( * * Calling this after [startAudioConversation] will play the response audio immediately. * - * @param mediaChunks The list of [InlineDataPart] instances representing the media data to be - * sent. + * @param mediaChunks The list of [MediaData] instances representing the media data to be sent. */ public suspend fun sendMediaStream( - mediaChunks: List, + mediaChunks: List, ) { val jsonString = Json.encodeToString( - LiveClientRealtimeInputSetup( - mediaChunks.map { (it.toInternal() as InlineDataPart.Internal).inlineData } - ) - .toInternal() + LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) session.send(Frame.Text(jsonString)) } @@ -269,7 +265,7 @@ internal constructor( ?.listenToRecording() ?.buffer(UNLIMITED) ?.accumulateUntil(MIN_BUFFER_SIZE) - ?.onEach { sendMediaStream(listOf(InlineDataPart(it, "audio/pcm"))) } + ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } ?.launchIn(scope) } @@ -472,15 +468,11 @@ internal constructor( * * End of turn is derived from user activity (eg; end of speech). */ - internal class LiveClientRealtimeInputSetup( - val mediaChunks: List - ) { + internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { @Serializable internal class Internal(val realtimeInput: LiveClientRealtimeInput) { @Serializable - internal data class LiveClientRealtimeInput( - val mediaChunks: List - ) + internal data class LiveClientRealtimeInput(val mediaChunks: List) } fun toInternal() = Internal(Internal.LiveClientRealtimeInput(mediaChunks)) } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt new file mode 100644 index 00000000000..7e58c9cf43c --- /dev/null +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/MediaData.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.firebase.vertexai.type + +import android.util.Base64 +import kotlinx.serialization.Serializable + +/** + * Represents the media data to be sent to the server + * + * @param data Byte array representing the data to be sent. + * @param mimeType an IANA standard MIME type. For supported MIME type values see the + * [Firebase documentation](https://firebase.google.com/docs/vertex-ai/input-file-requirements). + */ +@PublicPreviewAPI +public class MediaData(public val data: ByteArray, public val mimeType: String) { + @Serializable + internal class Internal( + val data: String, + val mimeType: String, + ) + + internal fun toInternal(): Internal { + return Internal(Base64.encodeToString(data, BASE_64_FLAGS), mimeType) + } +} From d534ef023b453e9b1d041953a45dd944ab2bcc76 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 14 Apr 2025 11:17:05 -0500 Subject: [PATCH 26/40] Update CHANGELOG.md --- firebase-vertexai/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 9ffeb7e9463..0b4a3f57ae7 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -3,6 +3,8 @@ [model documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters#how_to_configure_content_filters){: .external} for more information. * [fixed] Improved thread usage when using a `LiveGenerativeModel`. (#6870) +* [fixed] Fixed an issue with `LiveContentResponse` audio data not being present when the model was + interrupted or the turn completed. (#6870) # 16.3.0 From 97d46cb29f2c862fd0eee2701c8d09a6cceb9633 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:13:48 -0500 Subject: [PATCH 27/40] Use ByteArrayOutputStream --- .../google/firebase/vertexai/common/util/kotlin.kt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt index e459a5cfe12..ff9f57bf5a9 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt @@ -16,6 +16,7 @@ package com.google.firebase.vertexai.common.util +import java.io.ByteArrayOutputStream import java.lang.reflect.Field import kotlin.coroutines.EmptyCoroutineContext import kotlinx.coroutines.CoroutineScope @@ -66,17 +67,17 @@ internal fun Flow.accumulateUntil( minSize: Int, emitLeftOvers: Boolean = false ): Flow = flow { - val buffer = mutableListOf() + val buffer = ByteArrayOutputStream() collect { - buffer.addAll(it.asSequence()) - if (buffer.size >= minSize) { + buffer.write(it, 0, it.size) + if (buffer.size() >= minSize) { emit(buffer.toByteArray()) - buffer.clear() + buffer.reset() } } - if (emitLeftOvers && buffer.isNotEmpty()) { + if (emitLeftOvers && buffer.size() > 0) { emit(buffer.toByteArray()) } } From 9fae0b829e0762652e69bee46ad597c46b463462 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:41:31 -0500 Subject: [PATCH 28/40] Add catching for exceptions --- firebase-vertexai/CHANGELOG.md | 1 + .../firebase/vertexai/type/Exceptions.kt | 32 ++++ .../firebase/vertexai/type/LiveSession.kt | 139 ++++++++++-------- 3 files changed, 112 insertions(+), 60 deletions(-) diff --git a/firebase-vertexai/CHANGELOG.md b/firebase-vertexai/CHANGELOG.md index 0b4a3f57ae7..77dcbee8a0c 100644 --- a/firebase-vertexai/CHANGELOG.md +++ b/firebase-vertexai/CHANGELOG.md @@ -5,6 +5,7 @@ * [fixed] Improved thread usage when using a `LiveGenerativeModel`. (#6870) * [fixed] Fixed an issue with `LiveContentResponse` audio data not being present when the model was interrupted or the turn completed. (#6870) +* [fixed] Fixed an issue with `LiveSession` not converting exceptions to `FirebaseVertexAIException`. (#6870) # 16.3.0 diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Exceptions.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Exceptions.kt index f3256bf4c15..45e9ef027a6 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Exceptions.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/Exceptions.kt @@ -67,6 +67,38 @@ internal constructor(message: String, cause: Throwable? = null) : RuntimeExcepti RequestTimeoutException("The request failed to complete in the allotted time.") else -> UnknownException("Something unexpected happened.", cause) } + + /** + * Catch any exception thrown in the [callback] block and rethrow it as a + * [FirebaseVertexAIException]. + * + * Will return whatever the [callback] returns as well. + * + * @see catch + */ + internal suspend fun catchAsync(callback: suspend () -> T): T { + try { + return callback() + } catch (e: Exception) { + throw from(e) + } + } + + /** + * Catch any exception thrown in the [callback] block and rethrow it as a + * [FirebaseVertexAIException]. + * + * Will return whatever the [callback] returns as well. + * + * @see catchAsync + */ + internal fun catch(callback: () -> T): T { + try { + return callback() + } catch (e: Exception) { + throw from(e) + } + } } } diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 5fceaf4e8f8..c3fd68410ca 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -38,6 +38,7 @@ import kotlinx.coroutines.cancel import kotlinx.coroutines.channels.Channel.Factory.UNLIMITED import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.buffer +import kotlinx.coroutines.flow.catch import kotlinx.coroutines.flow.flow import kotlinx.coroutines.flow.launchIn import kotlinx.coroutines.flow.onCompletion @@ -95,21 +96,23 @@ internal constructor( public suspend fun startAudioConversation( functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null ) { - if (scope.isActive) { - Log.w( - TAG, - "startAudioConversation called after the recording has already started. " + - "Call stopAudioConversation to close the previous connection." - ) - return - } + FirebaseVertexAIException.catchAsync { + if (scope.isActive) { + Log.w( + TAG, + "startAudioConversation called after the recording has already started. " + + "Call stopAudioConversation to close the previous connection." + ) + return@catchAsync + } - scope = CoroutineScope(blockingDispatcher + childJob()) - audioHelper = AudioHelper.build() + scope = CoroutineScope(blockingDispatcher + childJob()) + audioHelper = AudioHelper.build() - recordUserAudio() - processModelResponses(functionCallHandler) - listenForModelPlayback() + recordUserAudio() + processModelResponses(functionCallHandler) + listenForModelPlayback() + } } /** @@ -120,13 +123,15 @@ internal constructor( * If there is no audio conversation currently active, this function does nothing. */ public fun stopAudioConversation() { - if (!startedReceiving.getAndSet(false)) return + FirebaseVertexAIException.catch { + if (!startedReceiving.getAndSet(false)) return@catch - scope.cancel() - playBackQueue.clear() + scope.cancel() + playBackQueue.clear() - audioHelper?.release() - audioHelper = null + audioHelper?.release() + audioHelper = null + } } /** @@ -140,31 +145,34 @@ internal constructor( * @see stopReceiving */ public fun receive(): Flow { - if (startedReceiving.getAndSet(true)) { - throw SessionAlreadyReceivingException() - } + return FirebaseVertexAIException.catch { + if (startedReceiving.getAndSet(true)) { + throw SessionAlreadyReceivingException() + } - // TODO(b/410059569): Remove when fixed - return flow { - while (true) { - val response = session.incoming.tryReceive() - if (response.isClosed || !startedReceiving.get()) break + // TODO(b/410059569): Remove when fixed + flow { + while (true) { + val response = session.incoming.tryReceive() + if (response.isClosed || !startedReceiving.get()) break - val frame = response.getOrNull() - frame?.let { frameToLiveContentResponse(it) }?.let { emit(it) } + val frame = response.getOrNull() + frame?.let { frameToLiveContentResponse(it) }?.let { emit(it) } - yield() + yield() + } } - } - .onCompletion { stopAudioConversation() } - - // TODO(b/410059569): Add back when fixed - // return session.incoming.receiveAsFlow().transform { frame -> - // val response = frameToLiveContentResponse(frame) - // response?.let { emit(it) } - // }.onCompletion { - // stopAudioConversation() - // } + .onCompletion { stopAudioConversation() } + .catch { throw FirebaseVertexAIException.from(it) } + + // TODO(b/410059569): Add back when fixed + // return session.incoming.receiveAsFlow().transform { frame -> + // val response = frameToLiveContentResponse(frame) + // response?.let { emit(it) } + // }.onCompletion { + // stopAudioConversation() + // }.catch { throw FirebaseVertexAIException.from(it) } + } } /** @@ -181,13 +189,15 @@ internal constructor( */ // TODO(b/410059569): Remove when fixed public fun stopReceiving() { - if (!startedReceiving.getAndSet(false)) return + FirebaseVertexAIException.catch { + if (!startedReceiving.getAndSet(false)) return@catch - scope.cancel() - playBackQueue.clear() + scope.cancel() + playBackQueue.clear() - audioHelper?.release() - audioHelper = null + audioHelper?.release() + audioHelper = null + } } /** @@ -197,11 +207,13 @@ internal constructor( * response from the client. */ public suspend fun sendFunctionResponse(functionList: List) { - val jsonString = - Json.encodeToString( - LiveToolResponseSetup(functionList.map { it.toInternalFunctionCall() }).toInternal() - ) - session.send(Frame.Text(jsonString)) + FirebaseVertexAIException.catchAsync { + val jsonString = + Json.encodeToString( + LiveToolResponseSetup(functionList.map { it.toInternalFunctionCall() }).toInternal() + ) + session.send(Frame.Text(jsonString)) + } } /** @@ -214,11 +226,13 @@ internal constructor( public suspend fun sendMediaStream( mediaChunks: List, ) { - val jsonString = - Json.encodeToString( - LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() - ) - session.send(Frame.Text(jsonString)) + FirebaseVertexAIException.catchAsync { + val jsonString = + Json.encodeToString( + LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() + ) + session.send(Frame.Text(jsonString)) + } } /** @@ -229,9 +243,11 @@ internal constructor( * @param content Client [Content] to be sent to the model. */ public suspend fun send(content: Content) { - val jsonString = - Json.encodeToString(LiveClientContentSetup(listOf(content.toInternal()), true).toInternal()) - session.send(Frame.Text(jsonString)) + FirebaseVertexAIException.catchAsync { + val jsonString = + Json.encodeToString(LiveClientContentSetup(listOf(content.toInternal()), true).toInternal()) + session.send(Frame.Text(jsonString)) + } } /** @@ -242,7 +258,7 @@ internal constructor( * @param text Text to be sent to the model. */ public suspend fun send(text: String) { - send(Content.Builder().text(text).build()) + FirebaseVertexAIException.catchAsync { send(Content.Builder().text(text).build()) } } /** @@ -254,8 +270,10 @@ internal constructor( * @see stopReceiving */ public suspend fun close() { - session.close() - stopAudioConversation() + FirebaseVertexAIException.catchAsync { + session.close() + stopAudioConversation() + } } /** Listen to the user's microphone and send the data to the model. */ @@ -266,6 +284,7 @@ internal constructor( ?.buffer(UNLIMITED) ?.accumulateUntil(MIN_BUFFER_SIZE) ?.onEach { sendMediaStream(listOf(MediaData(it, "audio/pcm"))) } + ?.catch { throw FirebaseVertexAIException.from(it) } ?.launchIn(scope) } From 0df186c35daaabbcd4c621ff5f7bf4fee233e6ed Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:41:52 -0500 Subject: [PATCH 29/40] Handle the return value on write to playback track --- .../firebase/vertexai/type/AudioHelper.kt | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index 6e14c23bc44..b4e934b0114 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -24,6 +24,7 @@ import android.media.AudioRecord import android.media.AudioTrack import android.media.MediaRecorder import android.media.audiofx.AcousticEchoCanceler +import android.util.Log import androidx.annotation.RequiresPermission import com.google.firebase.vertexai.common.util.readAsFlow import kotlinx.coroutines.flow.Flow @@ -67,8 +68,31 @@ internal class AudioHelper( */ fun playAudio(data: ByteArray) { if (released) return + if (data.isEmpty()) return + + val result = playbackTrack.write(data, 0, data.size) + if (result > 0) return + if (result == 0) { + Log.w( + TAG, + "Failed to write any audio bytes to the playback track. The audio track may have been stopped or paused." + ) + return + } - playbackTrack.write(data, 0, data.size) + // ERROR_INVALID_OPERATION and ERROR_BAD_VALUE should never occur + when (result) { + AudioTrack.ERROR_INVALID_OPERATION -> + throw IllegalStateException("The playback track was not properly initialized.") + AudioTrack.ERROR_BAD_VALUE -> + throw IllegalArgumentException("Playback data is somehow invalid.") + AudioTrack.ERROR_DEAD_OBJECT -> { + Log.w(TAG, "Attempted to playback some audio, but the track has been released.") + release() + } + AudioTrack.ERROR -> + throw RuntimeException("Failed to play the audio data for some unknown reason.") + } } /** @@ -111,6 +135,8 @@ internal class AudioHelper( } companion object { + private val TAG = AudioHelper::class.simpleName + /** * Creates an instance of [AudioHelper] with the track and record initialized. * From b90e10dcf1192d0872c8c13b22b5358813b976fc Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:46:05 -0500 Subject: [PATCH 30/40] Add note about startAudioConversation and sendFunctionResponse --- .../kotlin/com/google/firebase/vertexai/type/LiveSession.kt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index c3fd68410ca..32adc0ddf65 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -90,7 +90,8 @@ internal constructor( * [stopAudioConversation] or [close]. * * @param functionCallHandler A callback function that is invoked whenever the model receives a - * function call. + * function call. The [FunctionResponsePart] that the callback function returns will be + * automatically sent to the model. */ @RequiresPermission(RECORD_AUDIO) public suspend fun startAudioConversation( @@ -203,6 +204,9 @@ internal constructor( /** * Sends function calling responses to the model. * + * **NOTE:** If you're using [startAudioConversation], the method will handle sending function + * responses to the model for you. You do _not_ need to call this method in that case. + * * @param functionList The list of [FunctionResponsePart] instances indicating the function * response from the client. */ From f40dcfa55ece789aef4280c20463b71f68922bd0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:49:48 -0500 Subject: [PATCH 31/40] Catch recorder.stop exception --- .../com/google/firebase/vertexai/type/AudioHelper.kt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index b4e934b0114..4d7bf6f321b 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -105,7 +105,12 @@ internal class AudioHelper( fun pauseRecording() { if (released || recorder.recordingState == AudioRecord.RECORDSTATE_STOPPED) return - recorder.stop() + try { + recorder.stop() + } catch (e: IllegalStateException) { + release() + throw IllegalStateException("The playback track was not properly initialized.") + } } /** From 795dcfe6db0b5efb4b7a22554db1e04aeae305c4 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:49:59 -0500 Subject: [PATCH 32/40] Add docs for exceptions thrown --- .../com/google/firebase/vertexai/type/AudioHelper.kt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index 4d7bf6f321b..a4027c96550 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -65,6 +65,10 @@ internal class AudioHelper( * Play the provided audio data on the playback track. * * Does nothing if this [AudioHelper] has been [released][release]. + * + * @throws IllegalStateException If the playback track was not properly initialized. + * @throws IllegalArgumentException If the playback data is invalid. + * @throws RuntimeException If we fail to play the audio data for some unknown reason. */ fun playAudio(data: ByteArray) { if (released) return @@ -88,7 +92,7 @@ internal class AudioHelper( throw IllegalArgumentException("Playback data is somehow invalid.") AudioTrack.ERROR_DEAD_OBJECT -> { Log.w(TAG, "Attempted to playback some audio, but the track has been released.") - release() + release() // to ensure `released` is set and `record` is released too } AudioTrack.ERROR -> throw RuntimeException("Failed to play the audio data for some unknown reason.") @@ -101,6 +105,8 @@ internal class AudioHelper( * Does nothing if this [AudioHelper] has been [released][release]. * * @see resumeRecording + * + * @throws IllegalStateException If the playback track was not properly initialized. */ fun pauseRecording() { if (released || recorder.recordingState == AudioRecord.RECORDSTATE_STOPPED) return From e6475c71033b6916df9a677f1eaf2bf827ba016a Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 11:55:55 -0500 Subject: [PATCH 33/40] Use a fold instead of a collect --- .../firebase/vertexai/common/util/kotlin.kt | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt index ff9f57bf5a9..05e37e490ab 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/kotlin.kt @@ -25,6 +25,7 @@ import kotlinx.coroutines.cancel import kotlinx.coroutines.currentCoroutineContext import kotlinx.coroutines.flow.Flow import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.fold /** * Removes the last character from the [StringBuilder]. @@ -67,18 +68,19 @@ internal fun Flow.accumulateUntil( minSize: Int, emitLeftOvers: Boolean = false ): Flow = flow { - val buffer = ByteArrayOutputStream() - - collect { - buffer.write(it, 0, it.size) - if (buffer.size() >= minSize) { - emit(buffer.toByteArray()) - buffer.reset() + val remaining = + fold(ByteArrayOutputStream()) { buffer, it -> + buffer.apply { + write(it, 0, it.size) + if (size() >= minSize) { + emit(toByteArray()) + reset() + } + } } - } - if (emitLeftOvers && buffer.size() > 0) { - emit(buffer.toByteArray()) + if (emitLeftOvers && remaining.size() > 0) { + emit(remaining.toByteArray()) } } From 2e2892ab9245e1736bd04da054bc796b84a706e1 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 13:53:27 -0500 Subject: [PATCH 34/40] Update AudioHelper.kt --- .../kotlin/com/google/firebase/vertexai/type/AudioHelper.kt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt index a4027c96550..e74b766d9ce 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/AudioHelper.kt @@ -74,6 +74,8 @@ internal class AudioHelper( if (released) return if (data.isEmpty()) return + if (playbackTrack.playState == AudioTrack.PLAYSTATE_STOPPED) playbackTrack.play() + val result = playbackTrack.write(data, 0, data.size) if (result > 0) return if (result == 0) { @@ -176,8 +178,6 @@ internal class AudioHelper( AudioManager.AUDIO_SESSION_ID_GENERATE ) - playbackTrack.play() - val bufferSize = AudioRecord.getMinBufferSize( 16000, From 87f1ffa2cbce6e18e92e7a3c9be400f14760adc3 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:12:59 -0500 Subject: [PATCH 35/40] Update android.kt --- .../kotlin/com/google/firebase/vertexai/common/util/android.kt | 3 --- 1 file changed, 3 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index 982104c7a0c..023c38189bf 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -35,7 +35,6 @@ internal val AudioRecord.minBufferSize: Int */ internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) - val emptyBuffer = ByteArray(minBufferSize) while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { @@ -46,8 +45,6 @@ internal fun AudioRecord.readAsFlow() = flow { val bytesRead = read(buffer, 0, buffer.size) if (bytesRead > 0) { emit(buffer.copyOf(bytesRead)) - } else { - emit(emptyBuffer) } } } From 1ff16187e6e9ac12e7bd271745e4785c9a4e9a2a Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:20:32 -0500 Subject: [PATCH 36/40] Update LiveSession.kt --- .../firebase/vertexai/type/LiveSession.kt | 60 +++++++++++-------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt index 32adc0ddf65..30bd92c6043 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/type/LiveSession.kt @@ -214,7 +214,8 @@ internal constructor( FirebaseVertexAIException.catchAsync { val jsonString = Json.encodeToString( - LiveToolResponseSetup(functionList.map { it.toInternalFunctionCall() }).toInternal() + BidiGenerateContentToolResponseSetup(functionList.map { it.toInternalFunctionCall() }) + .toInternal() ) session.send(Frame.Text(jsonString)) } @@ -233,7 +234,7 @@ internal constructor( FirebaseVertexAIException.catchAsync { val jsonString = Json.encodeToString( - LiveClientRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() + BidiGenerateContentRealtimeInputSetup(mediaChunks.map { (it.toInternal()) }).toInternal() ) session.send(Frame.Text(jsonString)) } @@ -249,7 +250,9 @@ internal constructor( public suspend fun send(content: Content) { FirebaseVertexAIException.catchAsync { val jsonString = - Json.encodeToString(LiveClientContentSetup(listOf(content.toInternal()), true).toInternal()) + Json.encodeToString( + BidiGenerateContentClientContentSetup(listOf(content.toInternal()), true).toInternal() + ) session.send(Frame.Text(jsonString)) } } @@ -380,7 +383,8 @@ internal constructor( return when { "toolCall" in jsonMessage -> { - val functionContent = JSON.decodeFromJsonElement(jsonMessage) + val functionContent = + JSON.decodeFromJsonElement(jsonMessage) LiveContentResponse( null, LiveContentResponse.Status.NORMAL, @@ -391,7 +395,8 @@ internal constructor( } "serverContent" in jsonMessage -> { val serverContent = - JSON.decodeFromJsonElement(jsonMessage).serverContent + JSON.decodeFromJsonElement(jsonMessage) + .serverContent val status = when { serverContent.turnComplete == true -> LiveContentResponse.Status.TURN_COMPLETE @@ -412,20 +417,20 @@ internal constructor( * * Effectively, a message from the client to the model. */ - internal class LiveClientContentSetup( + internal class BidiGenerateContentClientContentSetup( val turns: List, val turnComplete: Boolean ) { @Serializable - internal class Internal(val clientContent: LiveClientContent) { + internal class Internal(val clientContent: BidiGenerateContentClientContent) { @Serializable - internal data class LiveClientContent( + internal data class BidiGenerateContentClientContent( val turns: List, val turnComplete: Boolean ) } - fun toInternal() = Internal(Internal.LiveClientContent(turns, turnComplete)) + fun toInternal() = Internal(Internal.BidiGenerateContentClientContent(turns, turnComplete)) } /** @@ -433,57 +438,58 @@ internal constructor( * * Effectively, a message from the model to the client. */ - internal class LiveServerContentSetup( + internal class BidiGenerateContentServerContentSetup( val modelTurn: Content.Internal?, val turnComplete: Boolean?, val interrupted: Boolean? ) { @Serializable - internal class Internal(val serverContent: LiveServerContent) { + internal class Internal(val serverContent: BidiGenerateContentServerContent) { @Serializable - internal data class LiveServerContent( + internal data class BidiGenerateContentServerContent( val modelTurn: Content.Internal?, val turnComplete: Boolean?, val interrupted: Boolean? ) } - fun toInternal() = Internal(Internal.LiveServerContent(modelTurn, turnComplete, interrupted)) + fun toInternal() = + Internal(Internal.BidiGenerateContentServerContent(modelTurn, turnComplete, interrupted)) } /** * Request for the client to execute the provided function calls and return the responses with the * matched `id`s. */ - internal data class LiveServerToolCall( + internal data class BidiGenerateContentToolCallSetup( val functionCalls: List ) { @Serializable - internal class Internal(val toolCall: LiveServerToolCall) { + internal class Internal(val toolCall: BidiGenerateContentToolCall) { @Serializable - internal data class LiveServerToolCall( + internal data class BidiGenerateContentToolCall( val functionCalls: List ) } fun toInternal(): Internal { - return Internal(Internal.LiveServerToolCall(functionCalls)) + return Internal(Internal.BidiGenerateContentToolCall(functionCalls)) } } - /** Client generated responses to a [LiveServerToolCall]. */ - internal class LiveToolResponseSetup( + /** Client generated responses to a [BidiGenerateContentToolCallSetup]. */ + internal class BidiGenerateContentToolResponseSetup( val functionResponses: List ) { @Serializable - internal data class Internal(val toolResponse: LiveToolResponse) { + internal data class Internal(val toolResponse: BidiGenerateContentToolResponse) { @Serializable - internal data class LiveToolResponse( + internal data class BidiGenerateContentToolResponse( val functionResponses: List ) } - fun toInternal() = Internal(Internal.LiveToolResponse(functionResponses)) + fun toInternal() = Internal(Internal.BidiGenerateContentToolResponse(functionResponses)) } /** @@ -491,13 +497,15 @@ internal constructor( * * End of turn is derived from user activity (eg; end of speech). */ - internal class LiveClientRealtimeInputSetup(val mediaChunks: List) { + internal class BidiGenerateContentRealtimeInputSetup(val mediaChunks: List) { @Serializable - internal class Internal(val realtimeInput: LiveClientRealtimeInput) { + internal class Internal(val realtimeInput: BidiGenerateContentRealtimeInput) { @Serializable - internal data class LiveClientRealtimeInput(val mediaChunks: List) + internal data class BidiGenerateContentRealtimeInput( + val mediaChunks: List + ) } - fun toInternal() = Internal(Internal.LiveClientRealtimeInput(mediaChunks)) + fun toInternal() = Internal(Internal.BidiGenerateContentRealtimeInput(mediaChunks)) } private companion object { From 644a50b7892496eb2c7486ea8a077aa8ecf4f2ba Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:32:07 -0500 Subject: [PATCH 37/40] Update android.kt --- .../com/google/firebase/vertexai/common/util/android.kt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index 023c38189bf..fdeb975593a 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -17,8 +17,8 @@ package com.google.firebase.vertexai.common.util import android.media.AudioRecord +import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow -import kotlinx.coroutines.yield /** * The minimum buffer size for this instance. @@ -31,14 +31,17 @@ internal val AudioRecord.minBufferSize: Int /** * Reads from this [AudioRecord] and returns the data in a flow. * - * Will emit a zeroed out buffer when this instance is not recording. + * Will emit a zeroed out buffer every 100ms when this instance is not recording. */ internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) + val emptyBuffer = ByteArray(minBufferSize) while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { - yield() + emit(emptyBuffer) + // The model will close the connection if you spam it too fast + delay(100) continue } From 8f199ef5eacddcb8eccc1a72d5f4342608cb0975 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:43:15 -0500 Subject: [PATCH 38/40] Revert "Update android.kt" This reverts commit 644a50b7892496eb2c7486ea8a077aa8ecf4f2ba. --- .../com/google/firebase/vertexai/common/util/android.kt | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index fdeb975593a..023c38189bf 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -17,8 +17,8 @@ package com.google.firebase.vertexai.common.util import android.media.AudioRecord -import kotlinx.coroutines.delay import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.yield /** * The minimum buffer size for this instance. @@ -31,17 +31,14 @@ internal val AudioRecord.minBufferSize: Int /** * Reads from this [AudioRecord] and returns the data in a flow. * - * Will emit a zeroed out buffer every 100ms when this instance is not recording. + * Will emit a zeroed out buffer when this instance is not recording. */ internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) - val emptyBuffer = ByteArray(minBufferSize) while (true) { if (recordingState != AudioRecord.RECORDSTATE_RECORDING) { - emit(emptyBuffer) - // The model will close the connection if you spam it too fast - delay(100) + yield() continue } From e38febd031e4f6d4aeaacbf2138b748fadf23dd8 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:43:45 -0500 Subject: [PATCH 39/40] Update android.kt --- .../kotlin/com/google/firebase/vertexai/common/util/android.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt index 023c38189bf..6de0339e032 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/common/util/android.kt @@ -31,7 +31,7 @@ internal val AudioRecord.minBufferSize: Int /** * Reads from this [AudioRecord] and returns the data in a flow. * - * Will emit a zeroed out buffer when this instance is not recording. + * Will yield when this instance is not recording. */ internal fun AudioRecord.readAsFlow() = flow { val buffer = ByteArray(minBufferSize) From ab359fbcfdf0d9ec8418223b83583ae91520456b Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 15 Apr 2025 14:47:19 -0500 Subject: [PATCH 40/40] Update LiveSessionFutures.kt --- .../com/google/firebase/vertexai/java/LiveSessionFutures.kt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt index a4318ecb653..169f9723ad8 100644 --- a/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt +++ b/firebase-vertexai/src/main/kotlin/com/google/firebase/vertexai/java/LiveSessionFutures.kt @@ -55,6 +55,7 @@ public abstract class LiveSessionFutures internal constructor() { * Starts an audio conversation with the model, which can only be stopped using * [stopAudioConversation]. */ + @RequiresPermission(RECORD_AUDIO) public abstract fun startAudioConversation(): ListenableFuture /** @@ -64,6 +65,7 @@ public abstract class LiveSessionFutures internal constructor() { * * If there is no audio conversation currently active, this function does nothing. */ + @RequiresPermission(RECORD_AUDIO) public abstract fun stopAudioConversation(): ListenableFuture /** @@ -163,6 +165,7 @@ public abstract class LiveSessionFutures internal constructor() { functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? ) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) } + @RequiresPermission(RECORD_AUDIO) override fun startAudioConversation() = SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }