diff --git a/.changeset/lazy-donuts-agree.md b/.changeset/lazy-donuts-agree.md new file mode 100644 index 0000000000..f2baca4dca --- /dev/null +++ b/.changeset/lazy-donuts-agree.md @@ -0,0 +1,9 @@ +--- +'firebase': minor +'@firebase/ai': minor +--- + +Added a `sendFunctionResponses` method to `LiveSession`, allowing function responses to be sent during realtime sessions. +Fixed an issue where function responses during audio conversations caused the WebSocket connection to close. See [GitHub Issue #9264](https://github.com/firebase/firebase-js-sdk/issues/9264). + - **Breaking Change**: Changed the `functionCallingHandler` property in `StartAudioConversationOptions` so that it now must return a `Promise`. + This breaking change is allowed in a minor release since the Live API is in Public Preview. diff --git a/common/api-review/ai.api.md b/common/api-review/ai.api.md index d4c3c1ad50..1e591b0053 100644 --- a/common/api-review/ai.api.md +++ b/common/api-review/ai.api.md @@ -988,6 +988,7 @@ export class LiveSession { isClosed: boolean; receive(): AsyncGenerator; send(request: string | Array, turnComplete?: boolean): Promise; + sendFunctionResponses(functionResponses: FunctionResponse[]): Promise; sendMediaChunks(mediaChunks: GenerativeContentBlob[]): Promise; sendMediaStream(mediaChunkStream: ReadableStream): Promise; } @@ -1254,7 +1255,7 @@ export function startAudioConversation(liveSession: LiveSession, options?: Start // @beta export interface StartAudioConversationOptions { - functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise; + functionCallingHandler?: (functionCalls: FunctionCall[]) => Promise; } // @public diff --git a/docs-devsite/ai.livesession.md b/docs-devsite/ai.livesession.md index 6ae2cde711..558c5eb3bd 100644 --- a/docs-devsite/ai.livesession.md +++ b/docs-devsite/ai.livesession.md @@ -39,6 +39,7 @@ export declare class LiveSession | [close()](./ai.livesession.md#livesessionclose) | | (Public Preview) Closes this session. All methods on this session will throw an error once this resolves. | | [receive()](./ai.livesession.md#livesessionreceive) | | (Public Preview) Yields messages received from the server. This can only be used by one consumer at a time. | | [send(request, turnComplete)](./ai.livesession.md#livesessionsend) | | (Public Preview) Sends content to the server. | +| [sendFunctionResponses(functionResponses)](./ai.livesession.md#livesessionsendfunctionresponses) | | (Public Preview) Sends function responses to the server. | | [sendMediaChunks(mediaChunks)](./ai.livesession.md#livesessionsendmediachunks) | | (Public Preview) Sends realtime input to the server. | | [sendMediaStream(mediaChunkStream)](./ai.livesession.md#livesessionsendmediastream) | | (Public Preview) Sends a stream of [GenerativeContentBlob](./ai.generativecontentblob.md#generativecontentblob_interface). | @@ -134,6 +135,33 @@ Promise<void> If this session has been closed. +## LiveSession.sendFunctionResponses() + +> This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. +> + +Sends function responses to the server. + +Signature: + +```typescript +sendFunctionResponses(functionResponses: FunctionResponse[]): Promise; +``` + +#### Parameters + +| Parameter | Type | Description | +| --- | --- | --- | +| functionResponses | [FunctionResponse](./ai.functionresponse.md#functionresponse_interface)\[\] | The function responses to send. | + +Returns: + +Promise<void> + +#### Exceptions + +If this session has been closed. + ## LiveSession.sendMediaChunks() > This API is provided as a preview for developers and may change based on feedback that we receive. Do not use this API in a production environment. diff --git a/docs-devsite/ai.startaudioconversationoptions.md b/docs-devsite/ai.startaudioconversationoptions.md index 08e91d2c7b..827cc0b129 100644 --- a/docs-devsite/ai.startaudioconversationoptions.md +++ b/docs-devsite/ai.startaudioconversationoptions.md @@ -25,7 +25,7 @@ export interface StartAudioConversationOptions | Property | Type | Description | | --- | --- | --- | -| [functionCallingHandler](./ai.startaudioconversationoptions.md#startaudioconversationoptionsfunctioncallinghandler) | (functionCalls: [LiveServerToolCall](./ai.liveservertoolcall.md#liveservertoolcall_interface)\['functionCalls'\]) => Promise<[Part](./ai.md#part)> | (Public Preview) An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a Part, which will then be sent back to the model. | +| [functionCallingHandler](./ai.startaudioconversationoptions.md#startaudioconversationoptionsfunctioncallinghandler) | (functionCalls: [FunctionCall](./ai.functioncall.md#functioncall_interface)\[\]) => Promise<[FunctionResponse](./ai.functionresponse.md#functionresponse_interface)> | (Public Preview) An async handler that is called when the model requests a function to be executed. The handler should perform the function call and return the result as a Part, which will then be sent back to the model. | ## StartAudioConversationOptions.functionCallingHandler @@ -37,5 +37,5 @@ An async handler that is called when the model requests a function to be execute Signature: ```typescript -functionCallingHandler?: (functionCalls: LiveServerToolCall['functionCalls']) => Promise; +functionCallingHandler?: (functionCalls: FunctionCall[]) => Promise; ``` diff --git a/packages/ai/src/methods/live-session-helpers.test.ts b/packages/ai/src/methods/live-session-helpers.test.ts index d7d1e2aabb..cad0475b35 100644 --- a/packages/ai/src/methods/live-session-helpers.test.ts +++ b/packages/ai/src/methods/live-session-helpers.test.ts @@ -21,7 +21,11 @@ import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; import { AIError } from '../errors'; import { startAudioConversation } from './live-session-helpers'; -import { LiveServerContent, LiveServerToolCall, Part } from '../types'; +import { + FunctionResponse, + LiveServerContent, + LiveServerToolCall +} from '../types'; import { logger } from '../logger'; import { isNode } from '@firebase/util'; @@ -62,6 +66,7 @@ class MockLiveSession { inConversation = false; send = sinon.stub(); sendMediaChunks = sinon.stub(); + sendFunctionResponses = sinon.stub(); messageGenerator = new MockMessageGenerator(); receive = (): MockMessageGenerator => this.messageGenerator; } @@ -249,16 +254,21 @@ describe('Audio Conversation Helpers', () => { }); it('should call function handler and send result on toolCall message.', async () => { - const handlerStub = sinon.stub().resolves({ - functionResponse: { name: 'get_weather', response: { temp: '72F' } } - } as Part); + const functionResponse: FunctionResponse = { + id: '1', + name: 'get_weather', + response: { temp: '72F' } + }; + const handlerStub = sinon.stub().resolves(functionResponse); const controller = await startAudioConversation(liveSession as any, { functionCallingHandler: handlerStub }); const toolCallMessage: LiveServerToolCall = { type: 'toolCall', - functionCalls: [{ name: 'get_weather', args: { location: 'LA' } }] + functionCalls: [ + { id: '1', name: 'get_weather', args: { location: 'LA' } } + ] }; liveSession.messageGenerator.simulateMessage(toolCallMessage); @@ -267,8 +277,8 @@ describe('Audio Conversation Helpers', () => { expect(handlerStub).to.have.been.calledOnceWith( toolCallMessage.functionCalls ); - expect(liveSession.send).to.have.been.calledOnceWith([ - { functionResponse: { name: 'get_weather', response: { temp: '72F' } } } + expect(liveSession.sendFunctionResponses).to.have.been.calledOnceWith([ + functionResponse ]); await controller.stop(); }); diff --git a/packages/ai/src/methods/live-session-helpers.ts b/packages/ai/src/methods/live-session-helpers.ts index e52715de36..b3907d6219 100644 --- a/packages/ai/src/methods/live-session-helpers.ts +++ b/packages/ai/src/methods/live-session-helpers.ts @@ -19,10 +19,10 @@ import { AIError } from '../errors'; import { logger } from '../logger'; import { AIErrorCode, + FunctionCall, + FunctionResponse, GenerativeContentBlob, - LiveServerContent, - LiveServerToolCall, - Part + LiveServerContent } from '../types'; import { LiveSession } from './live-session'; import { Deferred } from '@firebase/util'; @@ -115,8 +115,8 @@ export interface StartAudioConversationOptions { * which will then be sent back to the model. */ functionCallingHandler?: ( - functionCalls: LiveServerToolCall['functionCalls'] - ) => Promise; + functionCalls: FunctionCall[] + ) => Promise; } /** @@ -338,11 +338,11 @@ export class AudioConversationRunner { ); } else { try { - const resultPart = await this.options.functionCallingHandler( + const functionResponse = await this.options.functionCallingHandler( message.functionCalls ); if (!this.isStopped) { - void this.liveSession.send([resultPart]); + void this.liveSession.sendFunctionResponses([functionResponse]); } } catch (e) { throw new AIError( diff --git a/packages/ai/src/methods/live-session.test.ts b/packages/ai/src/methods/live-session.test.ts index 7db9daaebe..7454b1208c 100644 --- a/packages/ai/src/methods/live-session.test.ts +++ b/packages/ai/src/methods/live-session.test.ts @@ -20,6 +20,7 @@ import { spy, stub } from 'sinon'; import sinonChai from 'sinon-chai'; import chaiAsPromised from 'chai-as-promised'; import { + FunctionResponse, LiveResponseType, LiveServerContent, LiveServerToolCall, @@ -153,6 +154,35 @@ describe('LiveSession', () => { }); }); + describe('sendFunctionResponses()', () => { + it('should send all function responses', async () => { + const functionResponses: FunctionResponse[] = [ + { + id: 'function-call-1', + name: 'function-name', + response: { + result: 'foo' + } + }, + { + id: 'function-call-2', + name: 'function-name-2', + response: { + result: 'bar' + } + } + ]; + await session.sendFunctionResponses(functionResponses); + expect(mockHandler.send).to.have.been.calledOnce; + const sentData = JSON.parse(mockHandler.send.getCall(0).args[0]); + expect(sentData).to.deep.equal({ + toolResponse: { + functionResponses + } + }); + }); + }); + describe('receive()', () => { it('should correctly parse and transform all server message types', async () => { const receivePromise = (async () => { diff --git a/packages/ai/src/methods/live-session.ts b/packages/ai/src/methods/live-session.ts index 11e5346adc..92d325e2f0 100644 --- a/packages/ai/src/methods/live-session.ts +++ b/packages/ai/src/methods/live-session.ts @@ -17,6 +17,7 @@ import { AIErrorCode, + FunctionResponse, GenerativeContentBlob, LiveResponseType, LiveServerContent, @@ -30,7 +31,8 @@ import { WebSocketHandler } from '../websocket'; import { logger } from '../logger'; import { _LiveClientContent, - _LiveClientRealtimeInput + _LiveClientRealtimeInput, + _LiveClientToolResponse } from '../types/live-responses'; /** @@ -119,6 +121,32 @@ export class LiveSession { }); } + /** + * Sends function responses to the server. + * + * @param functionResponses - The function responses to send. + * @throws If this session has been closed. + * + * @beta + */ + async sendFunctionResponses( + functionResponses: FunctionResponse[] + ): Promise { + if (this.isClosed) { + throw new AIError( + AIErrorCode.REQUEST_ERROR, + 'This LiveSession has been closed and cannot be used.' + ); + } + + const message: _LiveClientToolResponse = { + toolResponse: { + functionResponses + } + }; + this.webSocketHandler.send(JSON.stringify(message)); + } + /** * Sends a stream of {@link GenerativeContentBlob}. * diff --git a/packages/ai/src/types/live-responses.ts b/packages/ai/src/types/live-responses.ts index 66170f1a5a..d1870fa109 100644 --- a/packages/ai/src/types/live-responses.ts +++ b/packages/ai/src/types/live-responses.ts @@ -15,7 +15,12 @@ * limitations under the License. */ -import { Content, GenerativeContentBlob, Part } from './content'; +import { + Content, + FunctionResponse, + GenerativeContentBlob, + Part +} from './content'; import { LiveGenerationConfig, Tool, ToolConfig } from './requests'; /** @@ -42,6 +47,17 @@ export interface _LiveClientRealtimeInput { mediaChunks: GenerativeContentBlob[]; }; } + +/** + * Function responses that are sent to the model in real time. + */ +// eslint-disable-next-line @typescript-eslint/naming-convention +export interface _LiveClientToolResponse { + toolResponse: { + functionResponses: FunctionResponse[]; + }; +} + /** * The first message in a Live session, used to configure generation options. *