Skip to content

Commit b973af8

Browse files
committed
chore: Add interface for embeddings providers
We are using Zod to define the types so we can use the schemas later for tool contracts chore: minor renaming chore: Only destruct what we actually need chore: add voyage provider to the package.json chore: draft integration of embeddings with the aggregate tool chore: fix style issues and typings chore: add accuracy test chore: add an accuracy test where the index name is provided by the user chore: fix metadata chore: add some integration tests with voyage AI chore: tests for basic quantization in the search itself chore: fix yaml chore: style check fixes chore: fix issue with the embedding transformation chore: simplify integration with embeddings and make it more configurable chore: fix accuracy tests and add defaults Update tests/integration/tools/mongodb/read/aggregate.test.ts Co-authored-by: Copilot <[email protected]> Update tests/integration/tools/mongodb/read/aggregate.test.ts Co-authored-by: Copilot <[email protected]> Update src/tools/mongodb/read/aggregate.ts Co-authored-by: Copilot <[email protected]> chore: improvements on documentation
1 parent 8a5da23 commit b973af8

File tree

10 files changed

+756
-147
lines changed

10 files changed

+756
-147
lines changed

.github/workflows/code-health.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ jobs:
3434
run: npm ci
3535
- name: Run tests
3636
run: npm test
37+
env:
38+
TEST_MDB_MCP_VOYAGE_API_KEY: ${{ secrets.TEST_MDB_MCP_VOYAGE_API_KEY }}
3739
- name: Upload test results
3840
if: always() && matrix.os == 'ubuntu-latest'
3941
uses: actions/upload-artifact@v4

package-lock.json

Lines changed: 22 additions & 135 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
"@typescript-eslint/parser": "^8.44.0",
7676
"@vitest/coverage-v8": "^3.2.4",
7777
"@vitest/eslint-plugin": "^1.3.4",
78-
"ai": "^5.0.72",
7978
"duplexpair": "^1.0.2",
8079
"eslint": "^9.34.0",
8180
"eslint-config-prettier": "^10.1.8",
@@ -103,6 +102,7 @@
103102
"@mongodb-js/devtools-proxy-support": "^0.5.3",
104103
"@mongosh/arg-parser": "^3.19.0",
105104
"@mongosh/service-provider-node-driver": "^3.17.0",
105+
"ai": "^5.0.72",
106106
"bson": "^6.10.4",
107107
"express": "^5.1.0",
108108
"lru-cache": "^11.1.0",
@@ -115,6 +115,7 @@
115115
"oauth4webapi": "^3.8.0",
116116
"openapi-fetch": "^0.14.0",
117117
"ts-levenshtein": "^1.0.7",
118+
"voyage-ai-provider": "^2.0.0",
118119
"yargs-parser": "21.1.1",
119120
"zod": "^3.25.76"
120121
},

src/common/errors.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ export enum ErrorCodes {
44
ForbiddenCollscan = 1_000_002,
55
ForbiddenWriteOperation = 1_000_003,
66
AtlasSearchNotSupported = 1_000_004,
7+
NoEmbeddingsProviderConfigured = 1_000_005,
8+
AtlasVectorSearchIndexNotFound = 1_000_006,
9+
AtlasVectorSearchInvalidQuery = 1_000_007,
710
}
811

912
export class MongoDBError<ErrorCode extends ErrorCodes = ErrorCodes> extends Error {
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import { createVoyage } from "voyage-ai-provider";
2+
import type { VoyageProvider } from "voyage-ai-provider";
3+
import { embedMany } from "ai";
4+
import type { UserConfig } from "../config.js";
5+
import assert from "assert";
6+
import { createFetch } from "@mongodb-js/devtools-proxy-support";
7+
import { z } from "zod";
8+
9+
type EmbeddingsInput = string;
10+
type Embeddings = number[];
11+
export type EmbeddingParameters = {
12+
inputType: "query" | "document";
13+
};
14+
15+
interface EmbeddingsProvider<SupportedModels extends string, SupportedEmbeddingParameters extends EmbeddingParameters> {
16+
embed(
17+
modelId: SupportedModels,
18+
content: EmbeddingsInput[],
19+
parameters: SupportedEmbeddingParameters
20+
): Promise<Embeddings[]>;
21+
}
22+
23+
export const zVoyageModels = z
24+
.enum(["voyage-3-large", "voyage-3.5", "voyage-3.5-lite", "voyage-code-3"])
25+
.default("voyage-3-large");
26+
27+
export const zVoyageEmbeddingParameters = z.object({
28+
outputDimension: z
29+
.union([z.literal(256), z.literal(512), z.literal(1024), z.literal(2048), z.literal(4096)])
30+
.optional()
31+
.default(1024),
32+
outputDType: z.enum(["float", "int8", "uint8", "binary", "ubinary"]).optional().default("float"),
33+
});
34+
35+
type VoyageModels = z.infer<typeof zVoyageModels>;
36+
type VoyageEmbeddingParameters = z.infer<typeof zVoyageEmbeddingParameters> & EmbeddingParameters;
37+
38+
class VoyageEmbeddingsProvider implements EmbeddingsProvider<VoyageModels, VoyageEmbeddingParameters> {
39+
private readonly voyage: VoyageProvider;
40+
41+
constructor({ voyageApiKey }: UserConfig, providedFetch?: typeof fetch) {
42+
assert(voyageApiKey, "voyageApiKey does not exist. This is likely a bug.");
43+
44+
// We should always use, by default, any enterprise proxy that the user has configured.
45+
// Direct requests to VoyageAI might get blocked by the network if they don't go through
46+
// the provided proxy.
47+
const customFetch: typeof fetch = (providedFetch ??
48+
createFetch({ useEnvironmentVariableProxies: true })) as unknown as typeof fetch;
49+
50+
this.voyage = createVoyage({ apiKey: voyageApiKey, fetch: customFetch });
51+
}
52+
53+
static isConfiguredIn({ voyageApiKey }: UserConfig): boolean {
54+
return !!voyageApiKey;
55+
}
56+
57+
async embed<Model extends VoyageModels>(
58+
modelId: Model,
59+
content: EmbeddingsInput[],
60+
parameters: VoyageEmbeddingParameters
61+
): Promise<Embeddings[]> {
62+
const model = this.voyage.textEmbeddingModel(modelId);
63+
const { embeddings } = await embedMany({
64+
model,
65+
values: content,
66+
providerOptions: { voyage: parameters },
67+
});
68+
69+
return embeddings;
70+
}
71+
}
72+
73+
export function getEmbeddingsProvider(
74+
userConfig: UserConfig
75+
): EmbeddingsProvider<VoyageModels, VoyageEmbeddingParameters> | undefined {
76+
if (VoyageEmbeddingsProvider.isConfiguredIn(userConfig)) {
77+
return new VoyageEmbeddingsProvider(userConfig);
78+
}
79+
80+
return undefined;
81+
}
82+
83+
export const zSupportedEmbeddingParameters = zVoyageEmbeddingParameters.extend({ model: zVoyageModels });
84+
export type SupportedEmbeddingParameters = z.infer<typeof zSupportedEmbeddingParameters>;

0 commit comments

Comments
 (0)