salesforcecli · cristiand391 · Sep 26, 2025 · Sep 26, 2025 · Sep 26, 2025 · Sep 26, 2025
diff --git a/packages/mcp-provider-dx-core/src/tools/run_soql_query.ts b/packages/mcp-provider-dx-core/src/tools/run_soql_query.ts
@@ -37,7 +37,7 @@ export const queryOrgParamsSchema = z.object({
   query: z.string().describe('SOQL query to run'),
   usernameOrAlias: usernameOrAliasParam,
   directory: directoryParam,
-  useToolingApi: useToolingApiParam,
+  useToolingApi: useToolingApiParam.describe('Use the Tooling API. Always set to true when querying a tooling sobject.'),
 });
 
 type InputArgs = z.infer<typeof queryOrgParamsSchema>;

diff --git a/packages/mcp/package.json b/packages/mcp/package.json
@@ -56,7 +56,6 @@
   },
   "devDependencies": {
     "@ai-sdk/google": "^1.2.22",
-    "@ai-sdk/openai": "^1.3.23",
     "@salesforce/cli-plugins-testkit": "^5.3.39",
     "@salesforce/dev-config": "^4.3.2",
     "@salesforce/prettier-config": "^0.0.3",

diff --git a/packages/mcp/test/evals/discoverability/deploy_metadata.eval.ts b/packages/mcp/test/evals/discoverability/deploy_metadata.eval.ts
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2025, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { describeEval } from 'vitest-evals';
+import { NoOpTaskRunner } from '../utils/runners.js';
+import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';
+
+describeEval('deploy', {
+  data: async () => [
+    {
+      input: 'Deploy this file to my default org and run all apex tests in deployment',
+      expectedTools: [
+        {
+          name: 'get_username',
+          arguments: {
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+            defaultTargetOrg: true,
+          },
+        },
+        {
+          name: 'deploy_metadata',
+          arguments: {
+            sourceDir: [process.env.SF_EVAL_PROMPT_OPEN_FILEPATH],
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+            apexTestLevel: 'RunAllTestsInOrg',
+            usernameOrAlias: 'ebikes-default-org',
+          },
+        },
+      ],
+    },
+    {
+      input: 'Deploy this project to my ebikes org',
+      expectedTools: [
+        {
+          name: 'deploy_metadata',
+          arguments: {
+            usernameOrAlias: 'ebikes',
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+          },
+        },
+      ],
+    },
+    {
+      input: 'Deploy this file and run the GeocodingServiceTest tests',
+      expectedTools: [
+        {
+          // user doesn't specify which org to deploy to -> discover it via `get_username`
+          name: 'get_username',
+          arguments: {
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+            defaultTargetOrg: true,
+          },
+        },
+        {
+          name: 'deploy_metadata',
+          arguments: {
+            usernameOrAlias: 'default-org',
+            sourceDir: [process.env.SF_EVAL_PROMPT_OPEN_FILEPATH],
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+            // IMPORTANT: there's a `run_apex_test` available but for these "run test during deployment" scenarios we want to ensure they are only run via `deploy_metadata`, it's a pretty common operation for an agentic loop (test failures rollback deployment)
+            apexTests: ['GeocodingServiceTest'],
+          },
+        },
+      ],
+    },
+  ],
+  task: NoOpTaskRunner(),
+  scorers: [ToolPredictionScorer()],
+  threshold: 1.0,
+  timeout: 30_000,
+});
diff --git a/packages/mcp/test/evals/discoverability/run_apex_test.eval.ts b/packages/mcp/test/evals/discoverability/run_apex_test.eval.ts
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2025, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { describeEval } from 'vitest-evals';
+import { NoOpTaskRunner } from '../utils/runners.js';
+import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';
+
+describeEval('', {
+  data: async () => [
+    {
+      input: 'Run the GeocodingServiceTest and FileUtilitiesTest tests in the dreamhouse org',
+      expectedTools: [
+        {
+          name: 'run_apex_test',
+          arguments: {
+            usernameOrAlias: 'dreamhouse',
+            classNames: ['GeocodingServiceTest', 'FileUtilitiesTest'],
+            testLevel: 'RunSpecifiedTests',
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+          },
+        },
+      ],
+    },
+    {
+      input: 'Run all apex tests in the org',
+      expectedTools: [
+        {
+          name: 'get_username',
+          arguments: {
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+            defaultTargetOrg: true,
+          },
+        },
+        {
+          name: 'run_apex_test',
+          arguments: {
+            usernameOrAlias: 'default-org',
+            testLevel: 'RunAllTestsInOrg',
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+          },
+        },
+      ],
+    },
+  ],
+  task: NoOpTaskRunner(),
+  scorers: [ToolPredictionScorer()],
+  threshold: 1.0,
+  timeout: 30_000,
+});
diff --git a/packages/mcp/test/evals/discoverability/run_soql_query.eval.ts b/packages/mcp/test/evals/discoverability/run_soql_query.eval.ts
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2025, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import { describeEval } from 'vitest-evals';
+import { NoOpTaskRunner } from '../utils/runners.js';
+import { ToolPredictionScorer } from '../utils/scorers/toolPredictionScorer.js';
+
+describeEval('run_soql_query', {
+  data: async () => [
+    {
+      input: 'List the name of the Property__c records in my org, ordered in ascending order by their name.',
+      expectedTools: [
+        {
+          name: 'run_soql_query',
+          arguments: {
+            query: 'SELECT Name FROM Property__c ORDER BY Name ASC',
+            usernameOrAlias: 'ebikes',
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+          },
+        },
+      ],
+    },
+    {
+      input: 'Get the coverage of the GeocodingService apex class, you can query the ApexCodeCoverage tooling object',
+      expectedTools: [
+        {
+          name: 'run_soql_query',
+          arguments: {
+            usernameOrAlias: 'ebikes',
+            query: 'SELECT Coverage FROM ApexCodeCoverage WHERE ApexClassOrTriggerId = ‘01pD000000066GR’',
+            useToolingApi: true,
+            directory: process.env.SF_EVAL_PROMPT_PROJECT_DIR,
+          },
+        },
+      ],
+    },
+  ],
+  task: NoOpTaskRunner(),
+  scorers: [ToolPredictionScorer()],
+  threshold: 1.0,
+  timeout: 30_000,
+});
diff --git a/packages/mcp/test/evals/e2e/deploy_metadata.eval.ts b/packages/mcp/test/evals/e2e/deploy_metadata.eval.ts
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2025, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import path from 'node:path';
+import { afterAll, beforeAll } from 'vitest';
+import { TestSession } from '@salesforce/cli-plugins-testkit';
+import { describeEval, ToolCallScorer } from 'vitest-evals';
+import { TaskRunner } from '../utils/runners.js';
+import { Factuality } from '../utils/scorers/factuality.js';
+
+let testSession: TestSession;
+let orgUsername: string;
+let projectDir: string;
+let currentOpenFile: string;
+
+beforeAll(async () => {
+  testSession = await TestSession.create({
+    project: { gitClone: 'https://github.com/trailheadapps/dreamhouse-lwc' },
+    scratchOrgs: [{ setDefault: true, config: path.join('config', 'project-scratch-def.json') }],
+    devhubAuthStrategy: 'AUTO',
+  });
+
+  projectDir = testSession.project.dir;
+  currentOpenFile = path.join(projectDir, 'force-app', 'main', 'default', 'classes', 'GeocodingServiceTest.cls');
+
+  // get default scratch org username
+  orgUsername = [...testSession.orgs.keys()][0];
+}, 600_000);
+
+afterAll(async () => {
+  await testSession.clean();
+});
+
+describeEval('deploy_metadata', {
+  data: async () => [
+    {
+      input:
+        'Deploy this project and run all Apex tests, then assign the dreamhouse permset and summarize the apex test results.',
+      expected: 'It should have successfully deployed the project and executed all 11 tests without failures',
+      expectedTools: (() => {
+        [
+          {
+            name: 'get_username',
+            arguments: {
+              defaultTargetOrg: true,
+              defaultDevHub: false,
+              directory: projectDir,
+            },
+          },
+          {
+            name: 'deploy_metadata',
+            arguments: {
+              apexTestLevel: 'RunAllTestsInOrg',
+              usernameOrAlias: orgUsername,
+              directory: projectDir,
+            },
+          },
+          {
+            name: 'assign_permission_set',
+            arguments: {
+              permissionSetName: 'dreamhouse',
+              usernameOrAlias: orgUsername,
+              directory: projectDir,
+            },
+          },
+        ];
+      })(),
+    },
+    {
+      input: 'Deploy this file and run the GeocodingServiceTest tests, then summarize the apex test results.',
+      expected:
+        'It should have deployed 1 component (GeocodingServiceTest class) and successfully executed the "GeocodingServiceTest.successResponse", "GeocodingServiceTest.blankAddress" and "GeocodingServiceTest.errorResponse" tests.',
+      expectedTools: (() => {
+        [
+          {
+            name: 'get_username',
+            arguments: {
+              defaultTargetOrg: true,
+              defaultDevHub: false,
+              directory: projectDir,
+            },
+          },
+          {
+            name: 'deploy_metadata',
+            arguments: {
+              apexTestLevel: 'RunAllTestsInOrg',
+              apexTests: ['GeocodingServiceTest'],
+              sourceDir: [currentOpenFile],
+              usernameOrAlias: orgUsername,
+              directory: projectDir,
+            },
+          },
+        ];
+      })(),
+    },
+  ],
+  task: (input: string) =>
+    TaskRunner({
+      promptOptions: {
+        currentOpenFile,
+        currentOpenWorkspace: projectDir,
+      },
+    })(input),
+  scorers: [
+    Factuality(),
+    ToolCallScorer({
+      ordered: true,
+      params: 'strict',
+    }),
+  ],
+  threshold: 0.8,
+  timeout: 600_000,
+});
diff --git a/packages/mcp/test/evals/e2e/describe_code_analyzer_rule.eval.ts b/packages/mcp/test/evals/e2e/describe_code_analyzer_rule.eval.ts
@@ -0,0 +1,32 @@
+import { describeEval } from 'vitest-evals';
+import { TaskRunner } from '../utils/runners.js';
+
+describeEval('describe_code_analyzer_rule', {
+  data: async () => [
+    {
+      input:
+        'tell me the tags that are associated with the Code Analysis Rule named VFUnescapeEl, which is a rule for the pmd engine',
+      expected: ['Recommended', 'Security', 'Visualforce'],
+    },
+  ],
+  task: TaskRunner(),
+  scorers: [outputIncludesExpectationArray],
+  threshold: 0.9,
+  timeout: 60_000,
+});
+
+export function outputIncludesExpectationArray(opts: { input: string; output: string; expected: string[] }) {
+  let score: number = 0;
+  //console.log(`output is ${opts.output}`);
+  const increment: number = 1 / opts.expected.length;
+  for (const expected of opts.expected) {
+    if (opts.output.toLowerCase().includes(expected.toLowerCase())) {
+      //console.log(`contained ${expected}, icnrementing`);
+      score += increment;
+    }
+    //console.log(`score is now ${score}`)
+  }
+  return {
+    score,
+  };
+}