From a9cf631a97194d0c2d6d46c281d874b1c5665fc8 Mon Sep 17 00:00:00 2001 From: Garvit Gupta Date: Thu, 17 Oct 2024 15:13:20 +0530 Subject: [PATCH] VS-284: Add Vectorize queryById operation for Bindings --- .../test/vectorize/vectorize-api-test.js | 39 +++++++++ .../internal/test/vectorize/vectorize-mock.js | 2 +- src/cloudflare/internal/vectorize-api.ts | 85 ++++++++++++------- src/cloudflare/internal/vectorize.d.ts | 10 +++ types/defines/vectorize.d.ts | 10 +++ 5 files changed, 116 insertions(+), 30 deletions(-) diff --git a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js index 01babcf50b6..3081d165d43 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-api-test.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-api-test.js @@ -57,6 +57,45 @@ export const test_vector_search_vector_query = { assert.deepStrictEqual(results, expected); } + { + // with returnValues = true, returnMetadata = "indexed" + const results = await IDX.queryById('some-vector-id', { + topK: 3, + returnValues: true, + returnMetadata: 'indexed', + }); + assert.equal(true, results.count > 0); + /** @type {VectorizeMatches} */ + const expected = { + matches: [ + { + id: 'b0daca4a-ffd8-4865-926b-e24800af2a2d', + values: [0.2331, 1.0125, 0.6131, 0.9421, 0.9661, 0.8121], + metadata: { text: 'She sells seashells by the seashore' }, + score: 0.71151, + }, + { + id: 'a44706aa-a366-48bc-8cc1-3feffd87d548', + values: [0.2321, 0.8121, 0.6315, 0.6151, 0.4121, 0.1512], + metadata: { + text: 'Peter Piper picked a peck of pickled peppers', + }, + score: 0.68913, + }, + { + id: '43cfcb31-07e2-411f-8bf9-f82a95ba8b96', + values: [0.0515, 0.7512, 0.8612, 0.2153, 0.15121, 0.6812], + metadata: { + text: 'You know New York, you need New York, you know you need unique New York', + }, + score: 0.94812, + }, + ], + count: 3, + }; + assert.deepStrictEqual(results, expected); + } + { // with returnValues = unset (false), returnMetadata = false ("none") const results = await IDX.query(new Float32Array(new Array(5).fill(0)), { diff --git a/src/cloudflare/internal/test/vectorize/vectorize-mock.js b/src/cloudflare/internal/test/vectorize/vectorize-mock.js index a8e033020b8..cc02aba429a 100644 --- a/src/cloudflare/internal/test/vectorize/vectorize-mock.js +++ b/src/cloudflare/internal/test/vectorize/vectorize-mock.js @@ -97,7 +97,7 @@ export default { ) { return Response.json({}); } else if (request.method === 'POST' && pathname.endsWith('/query')) { - /** @type {VectorizeQueryOptions & {vector: number[]}} */ + /** @type {VectorizeQueryOptions & ({vector: number[]} | {vectorId: string})} */ const body = await request.json(); let returnSet = structuredClone(exampleVectorMatches); if ( diff --git a/src/cloudflare/internal/vectorize-api.ts b/src/cloudflare/internal/vectorize-api.ts index 9912a90d27c..b245eae443e 100644 --- a/src/cloudflare/internal/vectorize-api.ts +++ b/src/cloudflare/internal/vectorize-api.ts @@ -18,6 +18,10 @@ enum Operation { type VectorizeVersion = 'v1' | 'v2'; +type QueryImplV2Params = + | { vector: VectorFloatArray | number[]; vectorId?: undefined } + | { vector?: undefined; vectorId: string }; + function toNdJson(arr: object[]): string { return arr.reduce((acc, o) => acc + JSON.stringify(o) + '\n', '').trim(); } @@ -50,35 +54,10 @@ class VectorizeIndexImpl implements Vectorize { options?: VectorizeQueryOptions ): Promise { if (this.indexVersion === 'v2') { - if (options?.returnMetadata) { - if ( - typeof options.returnMetadata !== 'boolean' && - !isVectorizeMetadataRetrievalLevel(options.returnMetadata) - ) { - throw new Error( - `Invalid returnMetadata option. Expected: true, false, "none", "indexed" or "all"; got: ${options.returnMetadata}` - ); - } - - if (typeof options.returnMetadata === 'boolean') { - // Allow boolean returnMetadata for backward compatibility. true converts to 'all' and false converts to 'none' - // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition - options.returnMetadata = options.returnMetadata ? 'all' : 'none'; - } - } - const res = await this._send(Operation.VECTOR_QUERY, `query`, { - method: 'POST', - body: JSON.stringify({ - ...options, - vector: Array.isArray(vector) ? vector : Array.from(vector), - }), - headers: { - 'content-type': 'application/json', - accept: 'application/json', - }, - }); - - return await toJson(res); + return await this.queryImplV2( + { vector: Array.isArray(vector) ? vector : Array.from(vector) }, + options + ); } else { if ( options && @@ -114,6 +93,17 @@ class VectorizeIndexImpl implements Vectorize { } } + public async queryById( + vectorId: string, + options?: VectorizeQueryOptions + ): Promise { + if (this.indexVersion === 'v1') { + throw new Error(`QueryById operation is not supported for v1 indexes.`); + } else { + return await this.queryImplV2({ vectorId }, options); + } + } + public async insert( vectors: VectorizeVector[] ): Promise { @@ -262,6 +252,43 @@ class VectorizeIndexImpl implements Vectorize { return res; } + + private async queryImplV2( + vectorParams: QueryImplV2Params, + options?: VectorizeQueryOptions + ): Promise { + if (options?.returnMetadata) { + if ( + typeof options.returnMetadata !== 'boolean' && + !isVectorizeMetadataRetrievalLevel(options.returnMetadata) + ) { + throw new Error( + `Invalid returnMetadata option. Expected: true, false, "none", "indexed" or "all"; got: ${options.returnMetadata}` + ); + } + + if (typeof options.returnMetadata === 'boolean') { + // Allow boolean returnMetadata for backward compatibility. true converts to 'all' and false converts to 'none' + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + options.returnMetadata = options.returnMetadata ? 'all' : 'none'; + } + } + const res = await this._send(Operation.VECTOR_QUERY, `query`, { + method: 'POST', + body: JSON.stringify({ + ...options, + ...(vectorParams.vector + ? { vector: vectorParams.vector } + : { vectorId: vectorParams.vectorId }), + }), + headers: { + 'content-type': 'application/json', + accept: 'application/json', + }, + }); + + return await toJson(res); + } } function isVectorizeMetadataRetrievalLevel(value: unknown): boolean { diff --git a/src/cloudflare/internal/vectorize.d.ts b/src/cloudflare/internal/vectorize.d.ts index 8f9a101ac6b..765264c3e16 100644 --- a/src/cloudflare/internal/vectorize.d.ts +++ b/src/cloudflare/internal/vectorize.d.ts @@ -243,6 +243,16 @@ declare abstract class Vectorize { vector: VectorFloatArray | number[], options?: VectorizeQueryOptions ): Promise; + /** + * Use the provided vector-id to perform a similarity search across the index. + * @param vectorId Id for a vector in the index against which the index should be queried. + * @param options Configuration options to massage the returned data. + * @returns A promise that resolves with matched and scored vectors. + */ + public queryById( + vectorId: string, + options?: VectorizeQueryOptions + ): Promise; /** * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown. * @param vectors List of vectors that will be inserted. diff --git a/types/defines/vectorize.d.ts b/types/defines/vectorize.d.ts index 3450933af0c..ac99b2a7ef7 100644 --- a/types/defines/vectorize.d.ts +++ b/types/defines/vectorize.d.ts @@ -235,6 +235,16 @@ declare abstract class Vectorize { vector: VectorFloatArray | number[], options?: VectorizeQueryOptions ): Promise; + /** + * Use the provided vector-id to perform a similarity search across the index. + * @param vectorId Id for a vector in the index against which the index should be queried. + * @param options Configuration options to massage the returned data. + * @returns A promise that resolves with matched and scored vectors. + */ + public queryById( + vectorId: string, + options?: VectorizeQueryOptions + ): Promise; /** * Insert a list of vectors into the index dataset. If a provided id exists, an error will be thrown. * @param vectors List of vectors that will be inserted.