Skip to content

Commit 6c971b1

Browse files
authored
Merge branch 'main' into ep/unit-test-report
2 parents 9a34275 + 544b1a2 commit 6c971b1

37 files changed

+925
-179
lines changed

firebase-ai/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Unreleased
22

3+
- [changed] Added better scheduling and louder output for Live API.
4+
- [changed] Added support for input and output transcription. (#7482)
5+
- [feature] Added support for sending realtime audio and video in a `LiveSession`.
36
- [changed] Removed redundant internal exception types. (#7475)
47

58
# 17.4.0

firebase-ai/api.txt

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,18 @@ package com.google.firebase.ai.java {
145145
method public abstract org.reactivestreams.Publisher<com.google.firebase.ai.type.LiveServerMessage> receive();
146146
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> send(com.google.firebase.ai.type.Content content);
147147
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> send(String text);
148+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendAudioRealtime(com.google.firebase.ai.type.InlineData audio);
148149
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList);
149-
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
150+
method @Deprecated public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks);
151+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendTextRealtime(String text);
152+
method public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> sendVideoRealtime(com.google.firebase.ai.type.InlineData video);
150153
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation();
151154
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
152155
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
153156
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
157+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
158+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
159+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
154160
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
155161
method public abstract void stopReceiving();
156162
field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
@@ -171,6 +177,10 @@ package com.google.firebase.ai.type {
171177
ctor public AudioRecordInitializationFailedException(String message);
172178
}
173179

180+
public final class AudioTranscriptionConfig {
181+
ctor public AudioTranscriptionConfig();
182+
}
183+
174184
public final class BlockReason {
175185
method public String getName();
176186
method public int getOrdinal();
@@ -801,6 +811,14 @@ package com.google.firebase.ai.type {
801811
public static final class ImagenSubjectReferenceType.Companion {
802812
}
803813

814+
public final class InlineData {
815+
ctor public InlineData(byte[] data, String mimeType);
816+
method public byte[] getData();
817+
method public String getMimeType();
818+
property public final byte[] data;
819+
property public final String mimeType;
820+
}
821+
804822
public final class InlineDataPart implements com.google.firebase.ai.type.Part {
805823
ctor public InlineDataPart(byte[] inlineData, String mimeType);
806824
method public byte[] getInlineData();
@@ -828,15 +846,19 @@ package com.google.firebase.ai.type {
828846
ctor public LiveGenerationConfig.Builder();
829847
method public com.google.firebase.ai.type.LiveGenerationConfig build();
830848
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty);
849+
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setInputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
831850
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens);
851+
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
832852
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty);
833853
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality);
834854
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig);
835855
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTemperature(Float? temperature);
836856
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopK(Integer? topK);
837857
method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopP(Float? topP);
838858
field public Float? frequencyPenalty;
859+
field public com.google.firebase.ai.type.AudioTranscriptionConfig? inputAudioTranscription;
839860
field public Integer? maxOutputTokens;
861+
field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription;
840862
field public Float? presencePenalty;
841863
field public com.google.firebase.ai.type.ResponseModality? responseModality;
842864
field public com.google.firebase.ai.type.SpeechConfig? speechConfig;
@@ -854,14 +876,18 @@ package com.google.firebase.ai.type {
854876
}
855877

856878
@com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage {
857-
ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete);
879+
ctor @Deprecated public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription);
858880
method public com.google.firebase.ai.type.Content? getContent();
859881
method public boolean getGenerationComplete();
882+
method public com.google.firebase.ai.type.Transcription? getInputTranscription();
860883
method public boolean getInterrupted();
884+
method public com.google.firebase.ai.type.Transcription? getOutputTranscription();
861885
method public boolean getTurnComplete();
862886
property public final com.google.firebase.ai.type.Content? content;
863887
property public final boolean generationComplete;
888+
property public final com.google.firebase.ai.type.Transcription? inputTranscription;
864889
property public final boolean interrupted;
890+
property public final com.google.firebase.ai.type.Transcription? outputTranscription;
865891
property public final boolean turnComplete;
866892
}
867893

@@ -891,20 +917,24 @@ package com.google.firebase.ai.type {
891917
method public kotlinx.coroutines.flow.Flow<com.google.firebase.ai.type.LiveServerMessage> receive();
892918
method public suspend Object? send(com.google.firebase.ai.type.Content content, kotlin.coroutines.Continuation<? super kotlin.Unit>);
893919
method public suspend Object? send(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
920+
method public suspend Object? sendAudioRealtime(com.google.firebase.ai.type.InlineData audio, kotlin.coroutines.Continuation<? super kotlin.Unit>);
894921
method public suspend Object? sendFunctionResponse(java.util.List<com.google.firebase.ai.type.FunctionResponsePart> functionList, kotlin.coroutines.Continuation<? super kotlin.Unit>);
895-
method public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
922+
method @Deprecated public suspend Object? sendMediaStream(java.util.List<com.google.firebase.ai.type.MediaData> mediaChunks, kotlin.coroutines.Continuation<? super kotlin.Unit>);
923+
method public suspend Object? sendTextRealtime(String text, kotlin.coroutines.Continuation<? super kotlin.Unit>);
924+
method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
896925
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
897926
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
927+
method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
898928
method public void stopAudioConversation();
899929
method public void stopReceiving();
900930
}
901931

902-
@com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData {
903-
ctor public MediaData(byte[] data, String mimeType);
904-
method public byte[] getData();
905-
method public String getMimeType();
906-
property public final byte[] data;
907-
property public final String mimeType;
932+
@Deprecated @com.google.firebase.ai.type.PublicPreviewAPI public final class MediaData {
933+
ctor @Deprecated public MediaData(byte[] data, String mimeType);
934+
method @Deprecated public byte[] getData();
935+
method @Deprecated public String getMimeType();
936+
property @Deprecated public final byte[] data;
937+
property @Deprecated public final String mimeType;
908938
}
909939

910940
public final class ModalityTokenCount {
@@ -1221,6 +1251,11 @@ package com.google.firebase.ai.type {
12211251
ctor public ToolConfig(com.google.firebase.ai.type.FunctionCallingConfig? functionCallingConfig);
12221252
}
12231253

1254+
public final class Transcription {
1255+
method public String? getText();
1256+
property public final String? text;
1257+
}
1258+
12241259
public final class UnknownException extends com.google.firebase.ai.type.FirebaseAIException {
12251260
}
12261261

firebase-ai/gradle.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
version=17.4.1
15+
version=17.5.0
1616
latestReleasedVersion=17.4.0

firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,9 @@ internal constructor(
111111
modelName,
112112
config?.toInternal(),
113113
tools?.map { it.toInternal() },
114-
systemInstruction?.toInternal()
114+
systemInstruction?.toInternal(),
115+
config?.inputAudioTranscription?.toInternal(),
116+
config?.outputAudioTranscription?.toInternal()
115117
)
116118
.toInternal()
117119
val data: String = Json.encodeToString(clientMessage)
@@ -135,7 +137,7 @@ internal constructor(
135137
} catch (e: ClosedReceiveChannelException) {
136138
val reason = webSession?.closeReason?.await()
137139
val message =
138-
"Channel was closed by the server.${if(reason!=null) " Details: ${reason.message}" else "" }"
140+
"Channel was closed by the server.${if (reason != null) " Details: ${reason.message}" else ""}"
139141
throw ServiceConnectionHandshakeFailedException(message, e)
140142
}
141143
}

firebase-ai/src/main/kotlin/com/google/firebase/ai/common/APIController.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ import kotlinx.coroutines.flow.map
7777
import kotlinx.coroutines.launch
7878
import kotlinx.coroutines.withTimeout
7979
import kotlinx.serialization.ExperimentalSerializationApi
80+
import kotlinx.serialization.json.ClassDiscriminatorMode
8081
import kotlinx.serialization.json.Json
8182

8283
@OptIn(ExperimentalSerializationApi::class)
@@ -85,6 +86,7 @@ internal val JSON = Json {
8586
prettyPrint = false
8687
isLenient = true
8788
explicitNulls = false
89+
classDiscriminatorMode = ClassDiscriminatorMode.NONE
8890
}
8991

9092
/**

firebase-ai/src/main/kotlin/com/google/firebase/ai/common/util/android.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,8 @@
1717
package com.google.firebase.ai.common.util
1818

1919
import android.media.AudioRecord
20-
import kotlin.time.Duration.Companion.milliseconds
2120
import kotlinx.coroutines.delay
2221
import kotlinx.coroutines.flow.flow
23-
import kotlinx.coroutines.yield
2422

2523
/**
2624
* The minimum buffer size for this instance.
@@ -40,15 +38,17 @@ internal fun AudioRecord.readAsFlow() = flow {
4038

4139
while (true) {
4240
if (recordingState != AudioRecord.RECORDSTATE_RECORDING) {
43-
// TODO(vguthal): Investigate if both yield and delay are required.
44-
delay(10.milliseconds)
45-
yield()
41+
// delay uses a different scheduler in the backend, so it's "stickier" in its enforcement when
42+
// compared to yield.
43+
delay(0)
4644
continue
4745
}
4846
val bytesRead = read(buffer, 0, buffer.size)
4947
if (bytesRead > 0) {
5048
emit(buffer.copyOf(bytesRead))
5149
}
52-
yield()
50+
// delay uses a different scheduler in the backend, so it's "stickier" in its enforcement when
51+
// compared to yield.
52+
delay(0)
5353
}
5454
}

0 commit comments

Comments
 (0)