Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/mcp-provider-dx-core/src/tools/run_soql_query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export const queryOrgParamsSchema = z.object({
query: z.string().describe('SOQL query to run'),
usernameOrAlias: usernameOrAliasParam,
directory: directoryParam,
useToolingApi: useToolingApiParam,
useToolingApi: useToolingApiParam.describe('Use the Tooling API. Always set to true when querying a tooling sobject.'),
});

type InputArgs = z.infer<typeof queryOrgParamsSchema>;
Expand Down
1 change: 0 additions & 1 deletion packages/mcp/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@
},
"devDependencies": {
"@ai-sdk/google": "^1.2.22",
"@ai-sdk/openai": "^1.3.23",
"@salesforce/cli-plugins-testkit": "^5.3.39",
"@salesforce/dev-config": "^4.3.2",
"@salesforce/prettier-config": "^0.0.3",
Expand Down
83 changes: 83 additions & 0 deletions packages/mcp/test/evals/discoverability/deploy_metadata.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describeEval } from 'vitest-evals';
import { NoOpTaskRunner } from '../utils/runners.js';
import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';

describeEval('deploy', {
data: async () => [
{
input: 'Deploy this file to my default org and run all apex tests in deployment',
expectedTools: [
{
name: 'get_username',
arguments: {
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
defaultTargetOrg: true,
},
},
{
name: 'deploy_metadata',
arguments: {
sourceDir: [process.env.SF_EVAL_PROMPT_OPEN_FILEPATH],
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
apexTestLevel: 'RunAllTestsInOrg',
usernameOrAlias: 'ebikes-default-org',
},
},
],
},
{
input: 'Deploy this project to my ebikes org',
expectedTools: [
{
name: 'deploy_metadata',
arguments: {
usernameOrAlias: 'ebikes',
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
},
},
],
},
{
input: 'Deploy this file and run the GeocodingServiceTest tests',
expectedTools: [
{
// user doesn't specify which org to deploy to -> discover it via `get_username`
name: 'get_username',
arguments: {
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
defaultTargetOrg: true,
},
},
{
name: 'deploy_metadata',
arguments: {
usernameOrAlias: 'default-org',
sourceDir: [process.env.SF_EVAL_PROMPT_OPEN_FILEPATH],
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
// IMPORTANT: there's a `run_apex_test` available but for these "run test during deployment" scenarios we want to ensure they are only run via `deploy_metadata`, it's a pretty common operation for an agentic loop (test failures rollback deployment)
apexTests: ['GeocodingServiceTest'],
},
},
],
},
],
task: NoOpTaskRunner(),
scorers: [ToolPredictionScorer()],
threshold: 1.0,
timeout: 30_000,
});
61 changes: 61 additions & 0 deletions packages/mcp/test/evals/discoverability/run_apex_test.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describeEval } from 'vitest-evals';
import { NoOpTaskRunner } from '../utils/runners.js';
import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';

describeEval('', {
data: async () => [
{
input: 'Run the GeocodingServiceTest and FileUtilitiesTest tests in the dreamhouse org',
expectedTools: [
{
name: 'run_apex_test',
arguments: {
usernameOrAlias: 'dreamhouse',
classNames: ['GeocodingServiceTest', 'FileUtilitiesTest'],
testLevel: 'RunSpecifiedTests',
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
},
},
],
},
{
input: 'Run all apex tests in the org',
expectedTools: [
{
name: 'get_username',
arguments: {
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
defaultTargetOrg: true,
},
},
{
name: 'run_apex_test',
arguments: {
usernameOrAlias: 'default-org',
testLevel: 'RunAllTestsInOrg',
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
},
},
],
},
],
task: NoOpTaskRunner(),
scorers: [ToolPredictionScorer()],
threshold: 1.0,
timeout: 30_000,
});
54 changes: 54 additions & 0 deletions packages/mcp/test/evals/discoverability/run_soql_query.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { describeEval } from 'vitest-evals';
import { NoOpTaskRunner } from '../utils/runners.js';
import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';

describeEval('run_soql_query', {
data: async () => [
{
input: 'List the name of the Property__c records in my org, ordered in ascending order by their name.',
expectedTools: [
{
name: 'run_soql_query',
arguments: {
query: 'SELECT Name FROM Property__c ORDER BY Name ASC',
usernameOrAlias: 'ebikes',
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
},
},
],
},
{
input: 'Get the coverage of the GeocodingService apex class, you can query the ApexCodeCoverage tooling object',
expectedTools: [
{
name: 'run_soql_query',
arguments: {
usernameOrAlias: 'ebikes',
query: 'SELECT Coverage FROM ApexCodeCoverage WHERE ApexClassOrTriggerId = ‘01pD000000066GR’',
useToolingApi: true,
directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
},
},
],
},
],
task: NoOpTaskRunner(),
scorers: [ToolPredictionScorer()],
threshold: 1.0,
timeout: 30_000,
});
125 changes: 125 additions & 0 deletions packages/mcp/test/evals/e2e/deploy_metadata.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright 2025, Salesforce, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import path from 'node:path';
import { afterAll, beforeAll } from 'vitest';
import { TestSession } from '@salesforce/cli-plugins-testkit';
import { describeEval, ToolCallScorer } from 'vitest-evals';
import { TaskRunner } from '../utils/runners.js';
import { Factuality } from '../utils/scorers/factuality.js';

let testSession: TestSession;
let orgUsername: string;
let projectDir: string;
let currentOpenFile: string;

beforeAll(async () => {
testSession = await TestSession.create({
project: { gitClone: 'https://github.com/trailheadapps/dreamhouse-lwc' },
scratchOrgs: [{ setDefault: true, config: path.join('config', 'project-scratch-def.json') }],
devhubAuthStrategy: 'AUTO',
});

projectDir = testSession.project.dir;
currentOpenFile = path.join(projectDir, 'force-app', 'main', 'default', 'classes', 'GeocodingServiceTest.cls');

// get default scratch org username
orgUsername = [...testSession.orgs.keys()][0];
}, 600_000);

afterAll(async () => {
await testSession.clean();
});

describeEval('deploy_metadata', {
data: async () => [
{
input:
'Deploy this project and run all Apex tests, then assign the dreamhouse permset and summarize the apex test results.',
expected: 'It should have successfully deployed the project and executed all 11 tests without failures',
expectedTools: (() => {
[
{
name: 'get_username',
arguments: {
defaultTargetOrg: true,
defaultDevHub: false,
directory: projectDir,
},
},
{
name: 'deploy_metadata',
arguments: {
apexTestLevel: 'RunAllTestsInOrg',
usernameOrAlias: orgUsername,
directory: projectDir,
},
},
{
name: 'assign_permission_set',
arguments: {
permissionSetName: 'dreamhouse',
usernameOrAlias: orgUsername,
directory: projectDir,
},
},
];
})(),
},
{
input: 'Deploy this file and run the GeocodingServiceTest tests, then summarize the apex test results.',
expected:
'It should have deployed 1 component (GeocodingServiceTest class) and successfully executed the "GeocodingServiceTest.successResponse", "GeocodingServiceTest.blankAddress" and "GeocodingServiceTest.errorResponse" tests.',
expectedTools: (() => {
[
{
name: 'get_username',
arguments: {
defaultTargetOrg: true,
defaultDevHub: false,
directory: projectDir,
},
},
{
name: 'deploy_metadata',
arguments: {
apexTestLevel: 'RunAllTestsInOrg',
apexTests: ['GeocodingServiceTest'],
sourceDir: [currentOpenFile],
usernameOrAlias: orgUsername,
directory: projectDir,
},
},
];
})(),
},
],
task: (input: string) =>
TaskRunner({
promptOptions: {
currentOpenFile,
currentOpenWorkspace: projectDir,
},
})(input),
scorers: [
Factuality(),
ToolCallScorer({
ordered: true,
params: 'strict',
}),
],
threshold: 0.8,
timeout: 600_000,
});
32 changes: 32 additions & 0 deletions packages/mcp/test/evals/e2e/describe_code_analyzer_rule.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { describeEval } from 'vitest-evals';
import { TaskRunner } from '../utils/runners.js';

describeEval('describe_code_analyzer_rule', {
data: async () => [
{
input:
'tell me the tags that are associated with the Code Analysis Rule named VFUnescapeEl, which is a rule for the pmd engine',
expected: ['Recommended', 'Security', 'Visualforce'],
},
],
task: TaskRunner(),
scorers: [outputIncludesExpectationArray],
threshold: 0.9,
timeout: 60_000,
});

export function outputIncludesExpectationArray(opts: { input: string; output: string; expected: string[] }) {
let score: number = 0;
//console.log(`output is ${opts.output}`);
const increment: number = 1 / opts.expected.length;
for (const expected of opts.expected) {
if (opts.output.toLowerCase().includes(expected.toLowerCase())) {
//console.log(`contained ${expected}, icnrementing`);
score += increment;
}
//console.log(`score is now ${score}`)
}
return {
score,
};
}
Loading
Loading