Skip to content

Commit 80cb5be

Browse files
authored
chore: more accuracy test tweaks (#675)
1 parent 3b9b888 commit 80cb5be

File tree

13 files changed

+68
-20
lines changed

13 files changed

+68
-20
lines changed

src/tools/atlas/read/getPerformanceAdvisor.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ export class GetPerformanceAdvisorTool extends AtlasToolBase {
2626
protected description = `Get MongoDB Atlas performance advisor recommendations, which includes the operations: suggested indexes, drop index suggestions, schema suggestions, and a sample of the most recent (max ${DEFAULT_SLOW_QUERY_LOGS_LIMIT}) slow query logs`;
2727
public operationType: OperationType = "read";
2828
protected argsShape = {
29-
projectId: AtlasArgs.projectId().describe("Atlas project ID to get performance advisor recommendations"),
29+
projectId: AtlasArgs.projectId().describe(
30+
"Atlas project ID to get performance advisor recommendations. The project ID is a hexadecimal identifier of 24 characters. If the user has only specified the name, use the `atlas-list-projects` tool to retrieve the user's projects with their ids."
31+
),
3032
clusterName: AtlasArgs.clusterName().describe("Atlas cluster name to get performance advisor recommendations"),
3133
operations: z
3234
.array(PerformanceAdvisorOperationType)

src/tools/mongodb/metadata/collectionSchema.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export class CollectionSchemaTool extends MongoDBToolBase {
2121
.optional()
2222
.default(ONE_MB)
2323
.describe(
24-
`The maximum number of bytes to return in the response. This value is capped by the servers configured maxBytesPerQuery and cannot be exceeded.`
24+
`The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded.`
2525
),
2626
};
2727

src/tools/mongodb/read/aggregate.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ export const AggregateArgs = {
6565
"An array of aggregation stages to execute. $vectorSearch can only appear as the first stage of the aggregation pipeline or as the first stage of a $unionWith subpipeline. When using $vectorSearch, unless the user explicitly asks for the embeddings, $unset any embedding field to avoid reaching context limits."
6666
),
6767
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
68-
The maximum number of bytes to return in the response. This value is capped by the servers configured maxBytesPerQuery and cannot be exceeded. \
68+
The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
6969
Note to LLM: If the entire aggregation result is required, use the "export" tool instead of increasing this limit.\
7070
`),
7171
};

src/tools/mongodb/read/find.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ export const FindArgs = {
3030
"A document, describing the sort order, matching the syntax of the sort argument of cursor.sort(). The keys of the object are the fields to sort on, while the values are the sort directions (1 for ascending, -1 for descending)."
3131
),
3232
responseBytesLimit: z.number().optional().default(ONE_MB).describe(`\
33-
The maximum number of bytes to return in the response. This value is capped by the servers configured maxBytesPerQuery and cannot be exceeded. \
33+
The maximum number of bytes to return in the response. This value is capped by the server's configured maxBytesPerQuery and cannot be exceeded. \
3434
Note to LLM: If the entire query result is required, use the "export" tool instead of increasing this limit.\
3535
`),
3636
};

tests/accuracy/collectionSchema.test.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,41 @@
11
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
2+
import { Matcher } from "./sdk/matcher.js";
3+
4+
const listCollectionsOptionalCall = {
5+
toolName: "list-collections",
6+
parameters: {
7+
database: "mflix",
8+
},
9+
optional: true,
10+
};
211

312
describeAccuracyTests([
413
{
514
prompt: "Is there a title field in 'mflix.movies' namespace?",
615
expectedToolCalls: [
16+
listCollectionsOptionalCall,
717
{
818
toolName: "collection-schema",
919
parameters: {
1020
database: "mflix",
1121
collection: "movies",
22+
sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()),
23+
responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
1224
},
1325
},
1426
],
1527
},
1628
{
1729
prompt: "What is the type of value stored in title field in movies collection in mflix database?",
1830
expectedToolCalls: [
31+
listCollectionsOptionalCall,
1932
{
2033
toolName: "collection-schema",
2134
parameters: {
2235
database: "mflix",
2336
collection: "movies",
37+
sampleSize: Matcher.anyOf(Matcher.undefined, Matcher.number()),
38+
responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
2439
},
2540
},
2641
],

tests/accuracy/explain.test.ts

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
22
import { Matcher } from "./sdk/matcher.js";
33

4-
/**
5-
* None of these tests score a parameter match on any of the models, likely
6-
* because we are using Zod.union, when we probably should've used
7-
* Zod.discriminatedUnion
8-
*/
94
describeAccuracyTests([
105
{
116
prompt: `Will fetching documents, where release_year is 2020, from 'mflix.movies' namespace perform a collection scan?`,
@@ -23,7 +18,7 @@ describeAccuracyTests([
2318
},
2419
},
2520
],
26-
verbosity: Matcher.string(),
21+
verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
2722
},
2823
},
2924
],
@@ -45,10 +40,11 @@ describeAccuracyTests([
4540
$match: { release_year: 2020 },
4641
},
4742
],
43+
responseBytesLimit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
4844
},
4945
},
5046
],
51-
verbosity: Matcher.string(),
47+
verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
5248
},
5349
},
5450
],
@@ -69,7 +65,7 @@ describeAccuracyTests([
6965
},
7066
},
7167
],
72-
verbosity: Matcher.string(),
68+
verbosity: Matcher.anyOf(Matcher.string(), Matcher.undefined),
7369
},
7470
},
7571
],

tests/accuracy/export.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ describeAccuracyTests([
1414
exportTarget: [
1515
{
1616
name: "find",
17-
arguments: {},
17+
arguments: {
18+
filter: Matcher.emptyObjectOrUndefined,
19+
},
1820
},
1921
],
2022
jsonExportFormat: Matcher.anyValue,

tests/accuracy/find.test.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
1+
import type { ExpectedToolCall } from "./sdk/accuracyResultStorage/resultStorage.js";
12
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
23
import { Matcher } from "./sdk/matcher.js";
34

5+
const optionalListCalls: (database: string) => ExpectedToolCall[] = (database) => [
6+
{
7+
toolName: "list-databases",
8+
parameters: {},
9+
optional: true,
10+
},
11+
{
12+
toolName: "list-collections",
13+
parameters: {
14+
database,
15+
},
16+
optional: true,
17+
},
18+
];
19+
420
describeAccuracyTests([
521
{
622
prompt: "List all the movies in 'mflix.movies' namespace.",
723
expectedToolCalls: [
24+
...optionalListCalls("mflix"),
825
{
926
toolName: "find",
1027
parameters: {
@@ -18,6 +35,7 @@ describeAccuracyTests([
1835
{
1936
prompt: "List all the documents in 'comics.books' namespace.",
2037
expectedToolCalls: [
38+
...optionalListCalls("comics"),
2139
{
2240
toolName: "find",
2341
parameters: {
@@ -31,6 +49,7 @@ describeAccuracyTests([
3149
{
3250
prompt: "Find all the movies in 'mflix.movies' namespace with runtime less than 100.",
3351
expectedToolCalls: [
52+
...optionalListCalls("mflix"),
3453
{
3554
toolName: "find",
3655
parameters: {
@@ -46,6 +65,7 @@ describeAccuracyTests([
4665
{
4766
prompt: "Find all movies in 'mflix.movies' collection where director is 'Christina Collins'",
4867
expectedToolCalls: [
68+
...optionalListCalls("mflix"),
4969
{
5070
toolName: "find",
5171
parameters: {
@@ -61,6 +81,7 @@ describeAccuracyTests([
6181
{
6282
prompt: "Give me all the movie titles available in 'mflix.movies' namespace",
6383
expectedToolCalls: [
84+
...optionalListCalls("mflix"),
6485
{
6586
toolName: "find",
6687
parameters: {
@@ -81,6 +102,7 @@ describeAccuracyTests([
81102
{
82103
prompt: "Use 'mflix.movies' namespace to answer who were casted in the movie 'Certain Fish'",
83104
expectedToolCalls: [
105+
...optionalListCalls("mflix"),
84106
{
85107
toolName: "find",
86108
parameters: {
@@ -99,6 +121,7 @@ describeAccuracyTests([
99121
{
100122
prompt: "From the mflix.movies namespace, give me first 2 movies of Horror genre sorted ascending by their runtime",
101123
expectedToolCalls: [
124+
...optionalListCalls("mflix"),
102125
{
103126
toolName: "find",
104127
parameters: {
@@ -112,8 +135,9 @@ describeAccuracyTests([
112135
],
113136
},
114137
{
115-
prompt: "I want a COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",
138+
prompt: "I want an exported COMPLETE list of all the movies ONLY from 'mflix.movies' namespace.",
116139
expectedToolCalls: [
140+
...optionalListCalls("mflix"),
117141
{
118142
toolName: "find",
119143
parameters: {

tests/accuracy/getPerformanceAdvisor.test.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { formatUntrustedData } from "../../src/tools/tool.js";
22
import { describeAccuracyTests } from "./sdk/describeAccuracyTests.js";
33
import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
4+
import { Matcher } from "./sdk/matcher.js";
45

56
const projectId = "68f600519f16226591d054c0";
67

@@ -78,7 +79,7 @@ describeAccuracyTests([
7879
},
7980
// Test for Drop Index Suggestions operation
8081
{
81-
prompt: "Show me drop index suggestions for the 'mflix' project and 'mflix-cluster' cluster",
82+
prompt: "Show me drop index suggestions for the project named 'mflix' and 'mflix-cluster' cluster",
8283
expectedToolCalls: [
8384
...listProjectsAndClustersToolCalls,
8485
{
@@ -136,6 +137,15 @@ describeAccuracyTests([
136137
parameters: {
137138
projectId,
138139
clusterName: "mflix-cluster",
140+
operations: Matcher.anyOf(
141+
Matcher.undefined,
142+
Matcher.value([
143+
"suggestedIndexes",
144+
"dropIndexSuggestions",
145+
"slowQueryLogs",
146+
"schemaSuggestions",
147+
])
148+
),
139149
},
140150
},
141151
],

tests/accuracy/logs.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ describeAccuracyTests([
99
toolName: "mongodb-logs",
1010
parameters: {
1111
type: "startupWarnings",
12+
limit: Matcher.anyOf(Matcher.undefined, Matcher.number()),
1213
},
1314
},
1415
],

0 commit comments

Comments
 (0)