chore: more accuracy test tweaks (#675)

nirinchev · web-flow · commit 80cb5be98387 · 2025-10-24T10:30:16.000+02:00
diff --git a/src/tools/atlas/read/getPerformanceAdvisor.ts b/src/tools/atlas/read/getPerformanceAdvisor.ts
@@ -26,7 +26,9 @@ export class GetPerformanceAdvisorTool extends AtlasToolBase {
     protected description = `Get MongoDB Atlas performance advisor recommendations, which includes the operations: suggested indexes, drop index suggestions, schema suggestions, and a sample of the most recent (max ${DEFAULT_SLOW_QUERY_LOGS_LIMIT}) slow query logs`;
     public operationType: OperationType = "read";
     protected argsShape = {
-        projectId: AtlasArgs.projectId().describe("Atlas project ID to get performance advisor recommendations"),
+        projectId: AtlasArgs.projectId().describe(
+            "Atlas project ID to get performance advisor recommendations. The project ID is a hexadecimal identifier of 24 characters. If the user has only specified the name, use the `atlas-list-projects` tool to retrieve the user's projects with their ids."
+        ),
         clusterName: AtlasArgs.clusterName().describe("Atlas cluster name to get performance advisor recommendations"),
         operations: z
             .array(PerformanceAdvisorOperationType)
diff --git a/src/tools/mongodb/metadata/collectionSchema.ts b/src/tools/mongodb/metadata/collectionSchema.ts
@@ -21,7 +21,7 @@ export class CollectionSchemaTool extends MongoDBToolBase {
             .optional()
             .default(ONE_MB)
             .describe(
-                `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`
+                `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded.`
             ),
     };
 
diff --git a/src/tools/mongodb/read/aggregate.ts b/src/tools/mongodb/read/aggregate.ts
@@ -65,7 +65,7 @@ export const AggregateArgs = {
             "An array of aggregation stages to execute. $vectorSearch can only appear as the first stage of the aggregation pipeline or as the first stage of a $unionWith subpipeline. When using $vectorSearch, unless the user explicitly asks for the embeddings, $unset any embedding field to avoid reaching context limits."
         ),
     responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
-The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \
+The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
 Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
 `),
 };
diff --git a/src/tools/mongodb/read/find.ts b/src/tools/mongodb/read/find.ts
@@ -30,7 +30,7 @@ export const FindArgs = {
             "A document, describing the sort order, matching the syntax of the sort argument of cursor.sort(). The keys of the object are the fields to sort on, while the values are the sort directions (1 for ascending, -1 for descending)."
         ),
     responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
-The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. \
+The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
 Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.\
 `),
 };
diff --git a/tests/accuracy/collectionSchema.test.ts b/tests/accuracy/collectionSchema.test.ts
@@ -1,26 +1,41 @@
 import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
+import { Matcher } from "./sdk/matcher.js";
+
+const listCollectionsOptionalCall = {
+    toolName: "list-collections",
+    parameters: {
+        database: "mflix",
+    },
+    optional: true,
+};
 
 describeAccuracyTests([
     {
         prompt: "Is there a title field in 'mflix.movies' namespace?",
         expectedToolCalls: [
+            listCollectionsOptionalCall,
             {
                 toolName: "collection-schema",
                 parameters: {
                     database: "mflix",
                     collection: "movies",
+                    sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()),
+                    responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
                 },
             },
         ],
     },
     {
         prompt: "What is the type of value stored in title field in movies collection in mflix database?",
         expectedToolCalls: [
+            listCollectionsOptionalCall,
             {
                 toolName: "collection-schema",
                 parameters: {
                     database: "mflix",
                     collection: "movies",
+                    sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()),
+                    responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
                 },
             },
         ],
diff --git a/tests/accuracy/explain.test.ts b/tests/accuracy/explain.test.ts
@@ -1,11 +1,6 @@
 import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
 import { Matcher } from "./sdk/matcher.js";
 
-/**
- * None of these tests score a parameter match on any of the models, likely
- * because we are using Zod.union, when we probably should've used
- * Zod.discriminatedUnion
- */
 describeAccuracyTests([
     {
         prompt: `Will fetching documents, where release_year is 2020, from 'mflix.movies' namespace perform a collection scan?`,
@@ -23,7 +18,7 @@ describeAccuracyTests([
                             },
                         },
                     ],
-                    verbosity: Matcher.string(),
+                    verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
                 },
             },
         ],
@@ -45,10 +40,11 @@ describeAccuracyTests([
                                         $match: { release_year: 2020 },
                                     },
                                 ],
+                                responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
                             },
                         },
                     ],
-                    verbosity: Matcher.string(),
+                    verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
                 },
             },
         ],
@@ -69,7 +65,7 @@ describeAccuracyTests([
                             },
                         },
                     ],
-                    verbosity: Matcher.string(),
+                    verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
                 },
             },
         ],
diff --git a/tests/accuracy/export.test.ts b/tests/accuracy/export.test.ts
@@ -14,7 +14,9 @@ describeAccuracyTests([
                     exportTarget: [
                         {
                             name: "find",
-                            arguments: {},
+                            arguments: {
+                                filter: Matcher.emptyObjectOrUndefined,
+                            },
                         },
                     ],
                     jsonExportFormat: Matcher.anyValue,
diff --git a/tests/accuracy/find.test.ts b/tests/accuracy/find.test.ts
@@ -1,10 +1,27 @@
+import type { ExpectedToolCall } from "./sdk/accuracyResultStorage/resultStorage.js";
 import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
 import { Matcher } from "./sdk/matcher.js";
 
+const optionalListCalls: (database: string) => ExpectedToolCall[] = (database) => [
+    {
+        toolName: "list-databases",
+        parameters: {},
+        optional: true,
+    },
+    {
+        toolName: "list-collections",
+        parameters: {
+            database,
+        },
+        optional: true,
+    },
+];
+
 describeAccuracyTests([
     {
         prompt: "List all the movies in 'mflix.movies' namespace.",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -18,6 +35,7 @@ describeAccuracyTests([
     {
         prompt: "List all the documents in 'comics.books' namespace.",
         expectedToolCalls: [
+            ...optionalListCalls("comics"),
             {
                 toolName: "find",
                 parameters: {
@@ -31,6 +49,7 @@ describeAccuracyTests([
     {
         prompt: "Find all the movies in 'mflix.movies' namespace with runtime less than 100.",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -46,6 +65,7 @@ describeAccuracyTests([
     {
         prompt: "Find all movies in 'mflix.movies' collection where director is 'Christina Collins'",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -61,6 +81,7 @@ describeAccuracyTests([
     {
         prompt: "Give me all the movie titles available in 'mflix.movies' namespace",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -81,6 +102,7 @@ describeAccuracyTests([
     {
         prompt: "Use 'mflix.movies' namespace to answer who were casted in the movie 'Certain Fish'",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -99,6 +121,7 @@ describeAccuracyTests([
     {
         prompt: "From the mflix.movies namespace, give me first 2 movies of Horror genre sorted ascending by their runtime",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
@@ -112,8 +135,9 @@ describeAccuracyTests([
         ],
     },
     {
-        prompt: "I want a COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",
+        prompt: "I want an exported COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",
         expectedToolCalls: [
+            ...optionalListCalls("mflix"),
             {
                 toolName: "find",
                 parameters: {
diff --git a/tests/accuracy/getPerformanceAdvisor.test.ts b/tests/accuracy/getPerformanceAdvisor.test.ts
@@ -1,6 +1,7 @@
 import { formatUntrustedData } from "../../src/tools/tool.js";
 import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
 import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
+import { Matcher } from "./sdk/matcher.js";
 
 const projectId = "68f600519f16226591d054c0";
 
@@ -78,7 +79,7 @@ describeAccuracyTests([
     },
     // Test for Drop Index Suggestions operation
     {
-        prompt: "Show me drop index suggestions for the 'mflix' project and 'mflix-cluster' cluster",
+        prompt: "Show me drop index suggestions for the project named 'mflix' and 'mflix-cluster' cluster",
         expectedToolCalls: [
             ...listProjectsAndClustersToolCalls,
             {
@@ -136,6 +137,15 @@ describeAccuracyTests([
                 parameters: {
                     projectId,
                     clusterName: "mflix-cluster",
+                    operations: Matcher.anyOf(
+                        Matcher.undefined,
+                        Matcher.value([
+                            "suggestedIndexes",
+                            "dropIndexSuggestions",
+                            "slowQueryLogs",
+                            "schemaSuggestions",
+                        ])
+                    ),
                 },
             },
         ],
diff --git a/tests/accuracy/logs.test.ts b/tests/accuracy/logs.test.ts
@@ -9,6 +9,7 @@ describeAccuracyTests([
                 toolName: "mongodb-logs",
                 parameters: {
                     type: "startupWarnings",
+                    limit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
                 },
             },
         ],
diff --git a/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts b/tests/integration/tools/mongodb/metadata/collectionSchema.test.ts
@@ -26,7 +26,7 @@ describeWithMongoDB("collectionSchema tool", (integration) => {
         {
             name: "responseBytesLimit",
             type: "number",
-            description: `The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded.`,
+            description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded.`,
             required: false,
         },
     ]);
diff --git a/tests/integration/tools/mongodb/read/aggregate.test.ts b/tests/integration/tools/mongodb/read/aggregate.test.ts
@@ -34,8 +34,7 @@ describeWithMongoDB("aggregate tool", (integration) => {
         },
         {
             name: "responseBytesLimit",
-            description:
-                'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.',
+            description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.`,
             type: "number",
             required: false,
         },
diff --git a/tests/integration/tools/mongodb/read/find.test.ts b/tests/integration/tools/mongodb/read/find.test.ts
@@ -56,8 +56,7 @@ describeWithMongoDB("find tool with default configuration", (integration) => {
         },
         {
             name: "responseBytesLimit",
-            description:
-                'The maximum number of bytes to return in the response. This value is capped by the server’s configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.',
+            description: `The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.`,
             type: "number",
             required: false,
         },

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,27 @@`
	`1`	`+import type { ExpectedToolCall } from "./sdk/accuracyResultStorage/resultStorage.js";`
`1`	`2`	`import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";`
`2`	`3`	`import { Matcher } from "./sdk/matcher.js";`
`3`	`4`
	`5`	`+const optionalListCalls: (database: string) => ExpectedToolCall[] = (database) => [`
	`6`	`+ {`
	`7`	`+ toolName: "list-databases",`
	`8`	`+ parameters: {},`
	`9`	`+ optional: true,`
	`10`	`+ },`
	`11`	`+ {`
	`12`	`+ toolName: "list-collections",`
	`13`	`+ parameters: {`
	`14`	`+ database,`
	`15`	`+ },`
	`16`	`+ optional: true,`
	`17`	`+ },`
	`18`	`+];`
	`19`	`+`
`4`	`20`	`describeAccuracyTests([`
`5`	`21`	`{`
`6`	`22`	`prompt: "List all the movies in 'mflix.movies' namespace.",`
`7`	`23`	`expectedToolCalls: [`
	`24`	`+ ...optionalListCalls("mflix"),`
`8`	`25`	`{`
`9`	`26`	`toolName: "find",`
`10`	`27`	`parameters: {`
`@@ -18,6 +35,7 @@ describeAccuracyTests([`
`18`	`35`	`{`
`19`	`36`	`prompt: "List all the documents in 'comics.books' namespace.",`
`20`	`37`	`expectedToolCalls: [`
	`38`	`+ ...optionalListCalls("comics"),`
`21`	`39`	`{`
`22`	`40`	`toolName: "find",`
`23`	`41`	`parameters: {`
`@@ -31,6 +49,7 @@ describeAccuracyTests([`
`31`	`49`	`{`
`32`	`50`	`prompt: "Find all the movies in 'mflix.movies' namespace with runtime less than 100.",`
`33`	`51`	`expectedToolCalls: [`
	`52`	`+ ...optionalListCalls("mflix"),`
`34`	`53`	`{`
`35`	`54`	`toolName: "find",`
`36`	`55`	`parameters: {`
`@@ -46,6 +65,7 @@ describeAccuracyTests([`
`46`	`65`	`{`
`47`	`66`	`prompt: "Find all movies in 'mflix.movies' collection where director is 'Christina Collins'",`
`48`	`67`	`expectedToolCalls: [`
	`68`	`+ ...optionalListCalls("mflix"),`
`49`	`69`	`{`
`50`	`70`	`toolName: "find",`
`51`	`71`	`parameters: {`
`@@ -61,6 +81,7 @@ describeAccuracyTests([`
`61`	`81`	`{`
`62`	`82`	`prompt: "Give me all the movie titles available in 'mflix.movies' namespace",`
`63`	`83`	`expectedToolCalls: [`
	`84`	`+ ...optionalListCalls("mflix"),`
`64`	`85`	`{`
`65`	`86`	`toolName: "find",`
`66`	`87`	`parameters: {`
`@@ -81,6 +102,7 @@ describeAccuracyTests([`
`81`	`102`	`{`
`82`	`103`	`prompt: "Use 'mflix.movies' namespace to answer who were casted in the movie 'Certain Fish'",`
`83`	`104`	`expectedToolCalls: [`
	`105`	`+ ...optionalListCalls("mflix"),`
`84`	`106`	`{`
`85`	`107`	`toolName: "find",`
`86`	`108`	`parameters: {`
`@@ -99,6 +121,7 @@ describeAccuracyTests([`
`99`	`121`	`{`
`100`	`122`	`prompt: "From the mflix.movies namespace, give me first 2 movies of Horror genre sorted ascending by their runtime",`
`101`	`123`	`expectedToolCalls: [`
	`124`	`+ ...optionalListCalls("mflix"),`
`102`	`125`	`{`
`103`	`126`	`toolName: "find",`
`104`	`127`	`parameters: {`
`@@ -112,8 +135,9 @@ describeAccuracyTests([`
`112`	`135`	`],`
`113`	`136`	`},`
`114`	`137`	`{`
`115`		`- prompt: "I want a COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",`
	`138`	`+ prompt: "I want an exported COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",`
`116`	`139`	`expectedToolCalls: [`
	`140`	`+ ...optionalListCalls("mflix"),`
`117`	`141`	`{`
`118`	`142`	`toolName: "find",`
`119`	`143`	`parameters: {`