Skip to content

test(ai): Add gemini-2.5-flash to integration tests #9110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 67 additions & 38 deletions packages/ai/integration/chat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,56 +76,85 @@ describe('Chat Session', () => {
'What is the capital of France?'
);
const response1 = result1.response;
expect(response1.text().trim().toLowerCase()).to.include('paris');
const result2 = await chat.sendMessage('And what about Italy?');
const response2 = result2.response;
const history = await chat.getHistory();

let history = await chat.getHistory();
expect(history.length).to.equal(2);
expect(response1.text().trim().toLowerCase()).to.include('paris');
expect(response1.usageMetadata).to.not.be.null;
expect(response2.text().trim().toLowerCase()).to.include('rome');
expect(response2.usageMetadata).to.not.be.null;
expect(history.length).to.equal(4);
expect(history[0].role).to.equal('user');
expect(history[0].parts[0].text).to.equal(
'What is the capital of France?'
);
expect(history[1].role).to.equal('model');
expect(history[1].parts[0].text?.toLowerCase()).to.include('paris');

expect(response1.usageMetadata).to.not.be.null;
// Token counts can vary slightly in chat context
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
15, // "What is the capital of France?" + system instruction
TOKEN_COUNT_DELTA + 2 // More variance for chat context
);
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8, // "Paris"
TOKEN_COUNT_DELTA
);
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
23, // "What is the capital of France?" + system instruction + "Paris"
TOKEN_COUNT_DELTA + 3 // More variance for chat context
);

const result2 = await chat.sendMessage('And what about Italy?');
const response2 = result2.response;
expect(response2.text().trim().toLowerCase()).to.include('rome');

history = await chat.getHistory();
expect(history.length).to.equal(4);
expect(history[2].role).to.equal('user');
expect(history[2].parts[0].text).to.equal('And what about Italy?');
expect(history[3].role).to.equal('model');
expect(history[3].parts[0].text?.toLowerCase()).to.include('rome');

expect(response2.usageMetadata).to.not.be.null;
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
28, // History + "And what about Italy?" + system instruction
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
);
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8,
TOKEN_COUNT_DELTA
);
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
36,
TOKEN_COUNT_DELTA
);
if (model.model.includes('gemini-2.5-flash')) {
// Token counts can vary slightly in chat context
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
17, // "What is the capital of France?" + system instruction
TOKEN_COUNT_DELTA + 2 // More variance for chat context
);
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8, // "Paris"
TOKEN_COUNT_DELTA
);
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
49, // "What is the capital of France?" + system instruction + "Paris"
TOKEN_COUNT_DELTA + 3 // More variance for chat context
);
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
49, // "What is the capital of France?" + system instruction + "Paris"
TOKEN_COUNT_DELTA + 3 // More variance for chat context
);

expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
32, // History + "And what about Italy?" + system instruction
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
);
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8,
TOKEN_COUNT_DELTA
);
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
68,
TOKEN_COUNT_DELTA + 2
);
} else if (model.model.includes('gemini-2.0-flash')) {
expect(response1.usageMetadata).to.not.be.null;
// Token counts can vary slightly in chat context
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
15, // "What is the capital of France?" + system instruction
TOKEN_COUNT_DELTA + 2 // More variance for chat context
);
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8, // "Paris"
TOKEN_COUNT_DELTA
);
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
23, // "What is the capital of France?" + system instruction + "Paris"
TOKEN_COUNT_DELTA + 3 // More variance for chat context
);
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
28, // History + "And what about Italy?" + system instruction
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
);
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
8,
TOKEN_COUNT_DELTA
);
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
36,
TOKEN_COUNT_DELTA
);
}
});
});
});
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/integration/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ const backendNames: Map<BackendType, string> = new Map([
[BackendType.VERTEX_AI, 'Vertex AI']
]);

const modelNames: readonly string[] = ['gemini-2.0-flash'];
const modelNames: readonly string[] = ['gemini-2.0-flash', 'gemini-2.5-flash'];

/**
* Array of test configurations that is iterated over to get full coverage
Expand Down
91 changes: 61 additions & 30 deletions packages/ai/integration/generate-content.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,36 +81,67 @@ describe('Generate Content', () => {
expect(trimmedText).to.equal('Mountain View');

expect(response.usageMetadata).to.not.be.null;
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
21,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
4,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
25,
TOKEN_COUNT_DELTA * 2
);
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(1);
expect(
response.usageMetadata!.promptTokensDetails![0].modality
).to.equal(Modality.TEXT);
expect(
response.usageMetadata!.promptTokensDetails![0].tokenCount
).to.equal(21);
expect(response.usageMetadata!.candidatesTokensDetails).to.not.be.null;
expect(
response.usageMetadata!.candidatesTokensDetails!.length
).to.equal(1);
expect(
response.usageMetadata!.candidatesTokensDetails![0].modality
).to.equal(Modality.TEXT);
expect(
response.usageMetadata!.candidatesTokensDetails![0].tokenCount
).to.be.closeTo(4, TOKEN_COUNT_DELTA);

if (model.model.includes('gemini-2.5-flash')) {
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
22,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
2,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
55,
TOKEN_COUNT_DELTA * 2
);
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
1
);
expect(
response.usageMetadata!.promptTokensDetails![0].modality
).to.equal(Modality.TEXT);
expect(
response.usageMetadata!.promptTokensDetails![0].tokenCount
).to.closeTo(22, TOKEN_COUNT_DELTA);

// candidatesTokenDetails comes back about half the time, so let's just not test it.
} else if (model.model.includes('gemini-2.0-flash')) {
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
21,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
4,
TOKEN_COUNT_DELTA
);
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
25,
TOKEN_COUNT_DELTA * 2
);
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
1
);
expect(
response.usageMetadata!.promptTokensDetails![0].modality
).to.equal(Modality.TEXT);
expect(
response.usageMetadata!.promptTokensDetails![0].tokenCount
).to.equal(21);
expect(response.usageMetadata!.candidatesTokensDetails).to.not.be
.null;
expect(
response.usageMetadata!.candidatesTokensDetails!.length
).to.equal(1);
expect(
response.usageMetadata!.candidatesTokensDetails![0].modality
).to.equal(Modality.TEXT);
expect(
response.usageMetadata!.candidatesTokensDetails![0].tokenCount
).to.be.closeTo(4, TOKEN_COUNT_DELTA);
}
});

it('generateContentStream: text input, text output', async () => {
Expand Down
Loading