diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc index 187e60d95ea..457a1c61446 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/opensearch.adoc @@ -43,14 +43,15 @@ A simple configuration can either be provided via Spring Boot's `application.yml [source,yaml] ---- spring: - opensearch: - uris: - username: - password: - indexName: - mappingJson: # API key if needed, e.g. OpenAI ai: + vectorstore: + opensearch: + uris: + username: + password: + indexName: + mappingJson: openai: api: key: @@ -124,21 +125,26 @@ You can use the following properties in your Spring Boot configuration to custom |`spring.ai.vectorstore.opensearch.username`| Username for accessing the OpenSearch cluster. | - |`spring.ai.vectorstore.opensearch.password`| Password for the specified username. | - |`spring.ai.vectorstore.opensearch.indexName`| Name of the default index to be used within the OpenSearch cluster. | `spring-ai-document-index` +|`spring.ai.vectorstore.opensearch.useApproximateKnn`| Whether to use the approximate k-NN method for searches. +If true, the approximate k-NN method is used for faster searches and better performance at large scales. See link:https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn[Approximate k-NN search]. +If false, the exact brute-force k-NN method is used for highly accurate searches. See link:https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/[Exact k-NN with scoring script]. | `false` +|`spring.ai.vectorstore.mongodb.initialize-schema`| whether to initialize the backend schema for you | `false` + |`spring.ai.vectorstore.opensearch.mappingJson`| JSON string defining the mapping for the index; specifies how documents and their fields are stored and indexed. | { "properties":{ "embedding":{ - "type":"knn_vector", - "dimension":1536 + "type":"knn_vector", + "dimension":1536 } } } -|`spring.opensearch.aws.host`| Hostname of the OpenSearch instance. | - -|`spring.opensearch.aws.service-name`| AWS service name for the OpenSearch instance. | - -|`spring.opensearch.aws.access-key`| AWS access key for the OpenSearch instance. | - -|`spring.opensearch.aws.secret-key`| AWS secret key for the OpenSearch instance. | - -|`spring.opensearch.aws.region`| AWS region for the OpenSearch instance. | - +|`spring.ai.vectorstore.opensearch.aws.host`| Hostname of the OpenSearch instance. | - +|`spring.ai.vectorstore.opensearch.aws.service-name`| AWS service name for the OpenSearch instance. | - +|`spring.ai.vectorstore.opensearch.aws.access-key`| AWS access key for the OpenSearch instance. | - +|`spring.ai.vectorstore.opensearch.aws.secret-key`| AWS secret key for the OpenSearch instance. | - +|`spring.ai.vectorstore.opensearch.aws.region`| AWS region for the OpenSearch instance. | - |=== === Customizing OpenSearch Client Configuration diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java index 35f922462b5..568de30e22c 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreAutoConfiguration.java @@ -26,6 +26,7 @@ import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder; import org.springframework.ai.embedding.EmbeddingModel; import org.springframework.ai.vectorstore.OpenSearchVectorStore; +import org.springframework.ai.vectorstore.OpenSearchVectorStoreOptions; import org.springframework.boot.autoconfigure.AutoConfiguration; import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; @@ -43,6 +44,10 @@ import java.util.List; import java.util.Optional; +/** + * @author Jemin Huh + * @since 1.0.0 + */ @AutoConfiguration @ConditionalOnClass({ OpenSearchVectorStore.class, EmbeddingModel.class, OpenSearchClient.class }) @EnableConfigurationProperties(OpenSearchVectorStoreProperties.class) @@ -58,10 +63,12 @@ PropertiesOpenSearchConnectionDetails openSearchConnectionDetails(OpenSearchVect @ConditionalOnMissingBean OpenSearchVectorStore vectorStore(OpenSearchVectorStoreProperties properties, OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) { - var indexName = Optional.ofNullable(properties.getIndexName()).orElse(OpenSearchVectorStore.DEFAULT_INDEX_NAME); - var mappingJson = Optional.ofNullable(properties.getMappingJson()) - .orElse(OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536); - return new OpenSearchVectorStore(indexName, openSearchClient, embeddingModel, mappingJson, + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + Optional.ofNullable(properties.getIndexName()).ifPresent(openSearchVectorStoreOptions::setIndexName); + Optional.ofNullable(properties.getMappingJson()).ifPresent(openSearchVectorStoreOptions::setMappingJson); + Optional.ofNullable(properties.getUseApproximateKnn()) + .ifPresent(openSearchVectorStoreOptions::setUseApproximateKnn); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions, properties.isInitializeSchema()); } diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java index a50c02ef655..579dd8616e1 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/opensearch/OpenSearchVectorStoreProperties.java @@ -36,6 +36,8 @@ public class OpenSearchVectorStoreProperties extends CommonVectorStoreProperties private String password; + private Boolean useApproximateKnn; + private String mappingJson; private Aws aws = new Aws(); @@ -76,6 +78,14 @@ public String getMappingJson() { return mappingJson; } + public Boolean getUseApproximateKnn() { + return useApproximateKnn; + } + + public void setUseApproximateKnn(Boolean useApproximateKnn) { + this.useApproximateKnn = useApproximateKnn; + } + public void setMappingJson(String mappingJson) { this.mappingJson = mappingJson; } diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java index 655c48fbdea..27f0f99adf0 100644 --- a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStore.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023 - 2024 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,8 +54,6 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean { private static final Logger logger = LoggerFactory.getLogger(OpenSearchVectorStore.class); - public static final String DEFAULT_INDEX_NAME = "spring-ai-document-index"; - public static final String DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536 = """ { "properties":{ @@ -67,6 +65,8 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean { } """; + private final OpenSearchVectorStoreOptions openSearchVectorStoreOptions; + private final EmbeddingModel embeddingModel; private final OpenSearchClient openSearchClient; @@ -75,43 +75,35 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean { private final FilterExpressionConverter filterExpressionConverter; - private final String mappingJson; + private final String similarityFunction; - private String similarityFunction; + private final boolean isUseApproximateKnn; private final boolean initializeSchema; - public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, - boolean initializeSchema) { - this(openSearchClient, embeddingModel, DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536, - initializeSchema); + public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) { + this(openSearchClient, embeddingModel, new OpenSearchVectorStoreOptions()); } - public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, String mappingJson, - boolean initializeSchema) { - this(DEFAULT_INDEX_NAME, openSearchClient, embeddingModel, mappingJson, initializeSchema); + public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, + OpenSearchVectorStoreOptions openSearchVectorStoreOptions) { + this(openSearchClient, embeddingModel, openSearchVectorStoreOptions, true); } - public OpenSearchVectorStore(String index, OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, - String mappingJson, boolean initializeSchema) { - Objects.requireNonNull(embeddingModel, "RestClient must not be null"); + public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, + OpenSearchVectorStoreOptions openSearchVectorStoreOptions, boolean initializeSchema) { + Objects.requireNonNull(openSearchClient, "OpenSearchClient must not be null"); Objects.requireNonNull(embeddingModel, "EmbeddingModel must not be null"); this.openSearchClient = openSearchClient; this.embeddingModel = embeddingModel; - this.index = index; - this.mappingJson = mappingJson; + this.openSearchVectorStoreOptions = openSearchVectorStoreOptions; + this.index = openSearchVectorStoreOptions.getIndexName(); this.filterExpressionConverter = new OpenSearchAiSearchFilterExpressionConverter(); - // the potential functions for vector fields at - // https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces - this.similarityFunction = COSINE_SIMILARITY_FUNCTION; + this.similarityFunction = openSearchVectorStoreOptions.getSimilarity(); + this.isUseApproximateKnn = openSearchVectorStoreOptions.isUseApproximateKnn(); this.initializeSchema = initializeSchema; } - public OpenSearchVectorStore withSimilarityFunction(String similarityFunction) { - this.similarityFunction = similarityFunction; - return this; - } - @Override public void add(List documents) { BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); @@ -152,16 +144,40 @@ public List similaritySearch(SearchRequest searchRequest) { public List similaritySearch(List embedding, int topK, double similarityThreshold, Filter.Expression filterExpression) { - return similaritySearch(new org.opensearch.client.opensearch.core.SearchRequest.Builder() - .query(getOpenSearchSimilarityQuery(embedding, filterExpression)) + float[] floatEmbedding = new float[embedding.size()]; + for (int i = 0; i < embedding.size(); i++) + floatEmbedding[i] = embedding.get(i).floatValue(); + return similaritySearch( + isUseApproximateKnn ? buildApproximateQuery(topK, similarityThreshold, filterExpression, floatEmbedding) + : buildExactQuery(embedding, topK, similarityThreshold, filterExpression)); + } + + private org.opensearch.client.opensearch.core.SearchRequest buildApproximateQuery(int topK, + double similarityThreshold, Filter.Expression filterExpression, float[] floatEmbedding) { + return new org.opensearch.client.opensearch.core.SearchRequest.Builder() + .query(Query.of(builder -> builder.knn(KnnQueryBuilder -> KnnQueryBuilder + .filter(Query + .of(queryBuilder -> queryBuilder.queryString(queryStringQuerybuilder -> queryStringQuerybuilder + .query(getOpenSearchQueryString(filterExpression))))) + .field("embedding") + .k(topK) + .vector(floatEmbedding)))) + .minScore(similarityThreshold) + .build(); + } + + private org.opensearch.client.opensearch.core.SearchRequest buildExactQuery(List embedding, int topK, + double similarityThreshold, Filter.Expression filterExpression) { + return new org.opensearch.client.opensearch.core.SearchRequest.Builder() + .query(buildExactQuery(embedding, filterExpression)) .sort(sortOptionsBuilder -> sortOptionsBuilder .score(scoreSortBuilder -> scoreSortBuilder.order(SortOrder.Desc))) .size(topK) .minScore(similarityThreshold) - .build()); + .build(); } - private Query getOpenSearchSimilarityQuery(List embedding, Filter.Expression filterExpression) { + private Query buildExactQuery(List embedding, Filter.Expression filterExpression) { return Query.of(queryBuilder -> queryBuilder.scriptScore(scriptScoreQueryBuilder -> { scriptScoreQueryBuilder .query(queryBuilder2 -> queryBuilder2.queryString(queryStringQuerybuilder -> queryStringQuerybuilder @@ -218,11 +234,11 @@ public boolean exists(String targetIndex) { } } - private CreateIndexResponse createIndexMapping(String index, String mappingJson) { + private CreateIndexResponse createIndexMapping(String mappingJson) { JsonpMapper jsonpMapper = openSearchClient._transport().jsonpMapper(); try { return this.openSearchClient.indices() - .create(new CreateIndexRequest.Builder().index(index) + .create(new CreateIndexRequest.Builder().index(this.index) .settings(settingsBuilder -> settingsBuilder.knn(true)) .mappings(TypeMapping._DESERIALIZER.deserialize( jsonpMapper.jsonProvider().createParser(new StringReader(mappingJson)), jsonpMapper)) @@ -235,8 +251,30 @@ private CreateIndexResponse createIndexMapping(String index, String mappingJson) @Override public void afterPropertiesSet() { + /** + * Generates a JSON string for the k-NN vector mapping configuration. The + * knn_vector field allows k-NN vectors ingestion into OpenSearch and supports + * various k-NN searches. + * https://opensearch.org/docs/latest/search-plugins/knn/knn-index#method-definitions + */ if (this.initializeSchema && !exists(this.index)) { - createIndexMapping(this.index, this.mappingJson); + createIndexMapping(Objects.requireNonNullElseGet(openSearchVectorStoreOptions.getMappingJson(), + () -> this.isUseApproximateKnn ? """ + { + "properties": { + "embedding": { + "type": "knn_vector", + "dimension": "%d", + "method": { + "name": "hnsw", + "engine": "lucene", + "space_type": "%s" + } + } + } + } + """.formatted(this.openSearchVectorStoreOptions.getDimensions(), this.similarityFunction) + : DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536)); } } diff --git a/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreOptions.java b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreOptions.java new file mode 100644 index 00000000000..f276e5ad906 --- /dev/null +++ b/vector-stores/spring-ai-opensearch-store/src/main/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreOptions.java @@ -0,0 +1,91 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +/** + * @author Jemin Huh + * @since 1.0.0 + */ +public class OpenSearchVectorStoreOptions { + + /** + * The name of the index to store the vectors. + */ + private String indexName = "spring-ai-document-index"; + + /** + * The number of dimensions in the vector. + */ + private int dimensions = 1536; + + /** + * The similarity function to use. the potential functions for vector fields at + * https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces + */ + private String similarity = "cosinesimil"; + + /** + * Indicates whether to use approximate kNN. If true, the approximate kNN method is + * used for faster searches and maintains good performance even at large scales. + * https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/ If false, + * the exact brute-force kNN method is used for precise and highly accurate searches. + * https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/ + */ + private boolean useApproximateKnn = false; + + private String mappingJson; + + public String getIndexName() { + return this.indexName; + } + + public void setIndexName(String indexName) { + this.indexName = indexName; + } + + public int getDimensions() { + return this.dimensions; + } + + public void setDimensions(int dims) { + this.dimensions = dims; + } + + public String getSimilarity() { + return similarity; + } + + public void setSimilarity(String similarity) { + this.similarity = similarity; + } + + public boolean isUseApproximateKnn() { + return this.useApproximateKnn; + } + + public void setUseApproximateKnn(boolean useApproximateKnn) { + this.useApproximateKnn = useApproximateKnn; + } + + public String getMappingJson() { + return mappingJson; + } + + public void setMappingJson(String mappingJson) { + this.mappingJson = mappingJson; + } + +} diff --git a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java index 35077fc2a78..0e33fc04ace 100644 --- a/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java +++ b/vector-stores/spring-ai-opensearch-store/src/test/java/org/springframework/ai/vectorstore/OpenSearchVectorStoreIT.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023 - 2024 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -98,21 +98,18 @@ private ApplicationContextRunner getContextRunner() { @BeforeEach void cleanDatabase() { getContextRunner().run(context -> { - VectorStore vectorStore = context.getBean(VectorStore.class); + VectorStore vectorStore = context.getBean("vectorStore", VectorStore.class); vectorStore.delete(List.of("_all")); }); } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) public void addAndSearchTest(String similarityFunction) { getContextRunner().run(context -> { - OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); - - if (!DEFAULT.equals(similarityFunction)) { - vectorStore.withSimilarityFunction(similarityFunction); - } + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + OpenSearchVectorStore.class); vectorStore.add(documents); @@ -143,15 +140,12 @@ public void addAndSearchTest(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) public void searchWithFilters(String similarityFunction) { getContextRunner().run(context -> { - OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); - - if (!DEFAULT.equals(similarityFunction)) { - vectorStore.withSimilarityFunction(similarityFunction); - } + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + OpenSearchVectorStore.class); var bgDocument = new Document("1", "The World is Big and Salvation Lurks Around the Corner", Map.of("country", "BG", "year", 2020, "activationDate", new Date(1000))); @@ -241,14 +235,12 @@ public void searchWithFilters(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) public void documentUpdateTest(String similarityFunction) { getContextRunner().run(context -> { - OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); - if (!DEFAULT.equals(similarityFunction)) { - vectorStore.withSimilarityFunction(similarityFunction); - } + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + OpenSearchVectorStore.class); Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!", Map.of("meta1", "meta1")); @@ -297,14 +289,241 @@ public void documentUpdateTest(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { DEFAULT, "l1", "l2", "linf" }) + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) public void searchThresholdTest(String similarityFunction) { getContextRunner().run(context -> { - OpenSearchVectorStore vectorStore = context.getBean(OpenSearchVectorStore.class); - if (!DEFAULT.equals(similarityFunction)) { - vectorStore.withSimilarityFunction(similarityFunction); - } + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + OpenSearchVectorStore.class); + + vectorStore.add(documents); + + SearchRequest query = SearchRequest.query("Great Depression") + .withTopK(50) + .withSimilarityThreshold(SearchRequest.SIMILARITY_THRESHOLD_ACCEPT_ALL); + + Awaitility.await().until(() -> vectorStore.similaritySearch(query), hasSize(3)); + + List fullResult = vectorStore.similaritySearch(query); + + List distances = fullResult.stream().map(doc -> (Float) doc.getMetadata().get("distance")).toList(); + + assertThat(distances).hasSize(3); + + float threshold = (distances.get(0) + distances.get(1)) / 2; + + List results = vectorStore.similaritySearch( + SearchRequest.query("Great Depression").withTopK(50).withSimilarityThreshold(1 - threshold)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(50).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) + public void approximateAddAndSearchTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_approximate_" + similarityFunction, + OpenSearchVectorStore.class); + + vectorStore.add(documents); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getContent()).contains("The Great Depression (1929–1939) was an economic shock"); + assertThat(resultDoc.getMetadata()).hasSize(2); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1).withSimilarityThreshold(0)), + hasSize(0)); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) + public void approximateSearchWithFilters(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_approximate_" + similarityFunction, + OpenSearchVectorStore.class); + + var bgDocument = new Document("1", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2020, "activationDate", new Date(1000))); + var nlDocument = new Document("2", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "NL", "activationDate", new Date(2000))); + var bgDocument2 = new Document("3", "The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2023, "activationDate", new Date(3000))); + + vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2)); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(SearchRequest.query("The World").withTopK(5)), hasSize(3)); + + List results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'NL'")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'BG'")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country == 'BG' && year == 2020")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country in ['BG']")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country in ['BG','NL']")); + + assertThat(results).hasSize(3); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("country not in ['BG']")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("NOT(country not in ['BG'])")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression( + "activationDate > " + ZonedDateTime.parse("1970-01-01T00:00:02Z").toInstant().toEpochMilli())); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument2.getId()); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(Document::getId).toList()); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(SearchRequest.query("The World").withTopK(1)), hasSize(0)); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) + public void approximateDocumentUpdateTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_approximate_" + similarityFunction, + OpenSearchVectorStore.class); + + Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!", + Map.of("meta1", "meta1")); + vectorStore.add(List.of(document)); + + Awaitility.await() + .until(() -> vectorStore + .similaritySearch(SearchRequest.query("Spring").withSimilarityThreshold(0).withTopK(5)), + hasSize(1)); + + List results = vectorStore + .similaritySearch(SearchRequest.query("Spring").withSimilarityThreshold(0).withTopK(5)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + assertThat(resultDoc.getContent()).isEqualTo("Spring AI rocks!!"); + assertThat(resultDoc.getMetadata()).containsKey("meta1"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + Document sameIdDocument = new Document(document.getId(), + "The World is Big and Salvation Lurks Around the Corner", Map.of("meta2", "meta2")); + + vectorStore.add(List.of(sameIdDocument)); + SearchRequest fooBarSearchRequest = SearchRequest.query("FooBar").withTopK(5); + + Awaitility.await() + .until(() -> vectorStore.similaritySearch(fooBarSearchRequest).get(0).getContent(), + equalTo("The World is Big and Salvation Lurks Around the Corner")); + + results = vectorStore.similaritySearch(fooBarSearchRequest); + + assertThat(results).hasSize(1); + resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + assertThat(resultDoc.getContent()).isEqualTo("The World is Big and Salvation Lurks Around the Corner"); + assertThat(resultDoc.getMetadata()).containsKey("meta2"); + assertThat(resultDoc.getMetadata()).containsKey("distance"); + + // Remove all documents from the store + vectorStore.delete(List.of(document.getId())); + + Awaitility.await().until(() -> vectorStore.similaritySearch(fooBarSearchRequest), hasSize(0)); + + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { DEFAULT, "l2", "innerproduct" }) + public void approximateAearchThresholdTest(String similarityFunction) { + + getContextRunner().run(context -> { + OpenSearchVectorStore vectorStore = context.getBean("vectorStore_approximate_" + similarityFunction, + OpenSearchVectorStore.class); vectorStore.add(documents); @@ -346,23 +565,79 @@ public void searchThresholdTest(String similarityFunction) { @EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class }) public static class TestApplication { + @Bean("vectorStore_" + DEFAULT) + public OpenSearchVectorStore vectorStoreDefault(OpenSearchClient openSearchClient, + EmbeddingModel embeddingModel) { + return new OpenSearchVectorStore(openSearchClient, embeddingModel); + } + + @Bean("vectorStore_l2") + public OpenSearchVectorStore vectorStoreL2(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) { + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + openSearchVectorStoreOptions.setIndexName("index_l2"); + openSearchVectorStoreOptions.setSimilarity("l2"); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions); + } + + @Bean("vectorStore_innerproduct") + public OpenSearchVectorStore vectorStoreLinf(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) { + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + openSearchVectorStoreOptions.setIndexName("index_innerproduct"); + openSearchVectorStoreOptions.setSimilarity("innerproduct"); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions); + } + + @Bean("vectorStore_approximate_" + DEFAULT) + public OpenSearchVectorStore vectorStoreApproximateDefault(OpenSearchClient openSearchClient, + EmbeddingModel embeddingModel) { + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + openSearchVectorStoreOptions.setIndexName("index_approximate_" + DEFAULT); + openSearchVectorStoreOptions.setUseApproximateKnn(true); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions); + } + + @Bean("vectorStore_approximate_l2") + public OpenSearchVectorStore vectorStoreApproximateL2(OpenSearchClient openSearchClient, + EmbeddingModel embeddingModel) { + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + openSearchVectorStoreOptions.setIndexName("index_approximate_l2"); + openSearchVectorStoreOptions.setSimilarity("l2"); + openSearchVectorStoreOptions.setUseApproximateKnn(true); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions); + } + + @Bean("vectorStore_approximate_innerproduct") + public OpenSearchVectorStore vectorStoreApproximateLinf(OpenSearchClient openSearchClient, + EmbeddingModel embeddingModel) { + OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions(); + openSearchVectorStoreOptions.setIndexName("index_approximate_innerproduct"); + openSearchVectorStoreOptions.setSimilarity("innerproduct"); + openSearchVectorStoreOptions.setUseApproximateKnn(true); + return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions); + } + + @Bean + public OpenSearchVectorStore vectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) { + return new OpenSearchVectorStore(openSearchClient, embeddingModel); + } + @Bean - public OpenSearchVectorStore vectorStore(EmbeddingModel embeddingModel) { + public EmbeddingModel embeddingModel() { + return new OpenAiEmbeddingModel(new OpenAiApi(System.getenv("OPENAI_API_KEY"))); + } + + @Bean + public OpenSearchClient openSearchClient() { try { - return new OpenSearchVectorStore(new OpenSearchClient(ApacheHttpClient5TransportBuilder + return new OpenSearchClient(ApacheHttpClient5TransportBuilder .builder(HttpHost.create(opensearchContainer.getHttpHostAddress())) - .build()), embeddingModel, true); + .build()); } catch (URISyntaxException e) { throw new RuntimeException(e); } } - @Bean - public EmbeddingModel embeddingModel() { - return new OpenAiEmbeddingModel(new OpenAiApi(System.getenv("OPENAI_API_KEY"))); - } - } }