Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,15 @@ A simple configuration can either be provided via Spring Boot's `application.yml
[source,yaml]
----
spring:
opensearch:
uris: <opensearch instance URIs>
username: <opensearch username>
password: <opensearch password>
indexName: <opensearch index name>
mappingJson: <JSON mapping for opensearch index>
# API key if needed, e.g. OpenAI
ai:
vectorstore:
opensearch:
uris: <opensearch instance URIs>
username: <opensearch username>
password: <opensearch password>
indexName: <opensearch index name>
mappingJson: <JSON mapping for opensearch index>
openai:
api:
key: <api-key>
Expand Down Expand Up @@ -124,21 +125,26 @@ You can use the following properties in your Spring Boot configuration to custom
|`spring.ai.vectorstore.opensearch.username`| Username for accessing the OpenSearch cluster. | -
|`spring.ai.vectorstore.opensearch.password`| Password for the specified username. | -
|`spring.ai.vectorstore.opensearch.indexName`| Name of the default index to be used within the OpenSearch cluster. | `spring-ai-document-index`
|`spring.ai.vectorstore.opensearch.useApproximateKnn`| Whether to use the approximate k-NN method for searches.
If true, the approximate k-NN method is used for faster searches and better performance at large scales. See link:https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn[Approximate k-NN search].
If false, the exact brute-force k-NN method is used for highly accurate searches. See link:https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/[Exact k-NN with scoring script]. | `false`
|`spring.ai.vectorstore.mongodb.initialize-schema`| whether to initialize the backend schema for you | `false`

|`spring.ai.vectorstore.opensearch.mappingJson`| JSON string defining the mapping for the index; specifies how documents and their
fields are stored and indexed. |
{
"properties":{
"embedding":{
"type":"knn_vector",
"dimension":1536
"type":"knn_vector",
"dimension":1536
}
}
}
|`spring.opensearch.aws.host`| Hostname of the OpenSearch instance. | -
|`spring.opensearch.aws.service-name`| AWS service name for the OpenSearch instance. | -
|`spring.opensearch.aws.access-key`| AWS access key for the OpenSearch instance. | -
|`spring.opensearch.aws.secret-key`| AWS secret key for the OpenSearch instance. | -
|`spring.opensearch.aws.region`| AWS region for the OpenSearch instance. | -
|`spring.ai.vectorstore.opensearch.aws.host`| Hostname of the OpenSearch instance. | -
|`spring.ai.vectorstore.opensearch.aws.service-name`| AWS service name for the OpenSearch instance. | -
|`spring.ai.vectorstore.opensearch.aws.access-key`| AWS access key for the OpenSearch instance. | -
|`spring.ai.vectorstore.opensearch.aws.secret-key`| AWS secret key for the OpenSearch instance. | -
|`spring.ai.vectorstore.opensearch.aws.region`| AWS region for the OpenSearch instance. | -
|===

=== Customizing OpenSearch Client Configuration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.OpenSearchVectorStore;
import org.springframework.ai.vectorstore.OpenSearchVectorStoreOptions;
import org.springframework.boot.autoconfigure.AutoConfiguration;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
Expand All @@ -43,6 +44,10 @@
import java.util.List;
import java.util.Optional;

/**
* @author Jemin Huh
* @since 1.0.0
*/
@AutoConfiguration
@ConditionalOnClass({ OpenSearchVectorStore.class, EmbeddingModel.class, OpenSearchClient.class })
@EnableConfigurationProperties(OpenSearchVectorStoreProperties.class)
Expand All @@ -58,10 +63,12 @@ PropertiesOpenSearchConnectionDetails openSearchConnectionDetails(OpenSearchVect
@ConditionalOnMissingBean
OpenSearchVectorStore vectorStore(OpenSearchVectorStoreProperties properties, OpenSearchClient openSearchClient,
EmbeddingModel embeddingModel) {
var indexName = Optional.ofNullable(properties.getIndexName()).orElse(OpenSearchVectorStore.DEFAULT_INDEX_NAME);
var mappingJson = Optional.ofNullable(properties.getMappingJson())
.orElse(OpenSearchVectorStore.DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536);
return new OpenSearchVectorStore(indexName, openSearchClient, embeddingModel, mappingJson,
OpenSearchVectorStoreOptions openSearchVectorStoreOptions = new OpenSearchVectorStoreOptions();
Optional.ofNullable(properties.getIndexName()).ifPresent(openSearchVectorStoreOptions::setIndexName);
Optional.ofNullable(properties.getMappingJson()).ifPresent(openSearchVectorStoreOptions::setMappingJson);
Optional.ofNullable(properties.getUseApproximateKnn())
.ifPresent(openSearchVectorStoreOptions::setUseApproximateKnn);
return new OpenSearchVectorStore(openSearchClient, embeddingModel, openSearchVectorStoreOptions,
properties.isInitializeSchema());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class OpenSearchVectorStoreProperties extends CommonVectorStoreProperties

private String password;

private Boolean useApproximateKnn;

private String mappingJson;

private Aws aws = new Aws();
Expand Down Expand Up @@ -76,6 +78,14 @@ public String getMappingJson() {
return mappingJson;
}

public Boolean getUseApproximateKnn() {
return useApproximateKnn;
}

public void setUseApproximateKnn(Boolean useApproximateKnn) {
this.useApproximateKnn = useApproximateKnn;
}

public void setMappingJson(String mappingJson) {
this.mappingJson = mappingJson;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2023-2024 the original author or authors.
* Copyright 2023 - 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,8 +54,6 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean {

private static final Logger logger = LoggerFactory.getLogger(OpenSearchVectorStore.class);

public static final String DEFAULT_INDEX_NAME = "spring-ai-document-index";

public static final String DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536 = """
{
"properties":{
Expand All @@ -67,6 +65,8 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean {
}
""";

private final OpenSearchVectorStoreOptions openSearchVectorStoreOptions;

private final EmbeddingModel embeddingModel;

private final OpenSearchClient openSearchClient;
Expand All @@ -75,43 +75,35 @@ public class OpenSearchVectorStore implements VectorStore, InitializingBean {

private final FilterExpressionConverter filterExpressionConverter;

private final String mappingJson;
private final String similarityFunction;

private String similarityFunction;
private final boolean isUseApproximateKnn;

private final boolean initializeSchema;

public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel,
boolean initializeSchema) {
this(openSearchClient, embeddingModel, DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536,
initializeSchema);
public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel) {
this(openSearchClient, embeddingModel, new OpenSearchVectorStoreOptions());
}

public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel, String mappingJson,
boolean initializeSchema) {
this(DEFAULT_INDEX_NAME, openSearchClient, embeddingModel, mappingJson, initializeSchema);
public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel,
OpenSearchVectorStoreOptions openSearchVectorStoreOptions) {
this(openSearchClient, embeddingModel, openSearchVectorStoreOptions, true);
}

public OpenSearchVectorStore(String index, OpenSearchClient openSearchClient, EmbeddingModel embeddingModel,
String mappingJson, boolean initializeSchema) {
Objects.requireNonNull(embeddingModel, "RestClient must not be null");
public OpenSearchVectorStore(OpenSearchClient openSearchClient, EmbeddingModel embeddingModel,
OpenSearchVectorStoreOptions openSearchVectorStoreOptions, boolean initializeSchema) {
Objects.requireNonNull(openSearchClient, "OpenSearchClient must not be null");
Objects.requireNonNull(embeddingModel, "EmbeddingModel must not be null");
this.openSearchClient = openSearchClient;
this.embeddingModel = embeddingModel;
this.index = index;
this.mappingJson = mappingJson;
this.openSearchVectorStoreOptions = openSearchVectorStoreOptions;
this.index = openSearchVectorStoreOptions.getIndexName();
this.filterExpressionConverter = new OpenSearchAiSearchFilterExpressionConverter();
// the potential functions for vector fields at
// https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces
this.similarityFunction = COSINE_SIMILARITY_FUNCTION;
this.similarityFunction = openSearchVectorStoreOptions.getSimilarity();
this.isUseApproximateKnn = openSearchVectorStoreOptions.isUseApproximateKnn();
this.initializeSchema = initializeSchema;
}

public OpenSearchVectorStore withSimilarityFunction(String similarityFunction) {
this.similarityFunction = similarityFunction;
return this;
}

@Override
public void add(List<Document> documents) {
BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder();
Expand Down Expand Up @@ -152,16 +144,40 @@ public List<Document> similaritySearch(SearchRequest searchRequest) {

public List<Document> similaritySearch(List<Double> embedding, int topK, double similarityThreshold,
Filter.Expression filterExpression) {
return similaritySearch(new org.opensearch.client.opensearch.core.SearchRequest.Builder()
.query(getOpenSearchSimilarityQuery(embedding, filterExpression))
float[] floatEmbedding = new float[embedding.size()];
for (int i = 0; i < embedding.size(); i++)
floatEmbedding[i] = embedding.get(i).floatValue();
return similaritySearch(
isUseApproximateKnn ? buildApproximateQuery(topK, similarityThreshold, filterExpression, floatEmbedding)
: buildExactQuery(embedding, topK, similarityThreshold, filterExpression));
}

private org.opensearch.client.opensearch.core.SearchRequest buildApproximateQuery(int topK,
double similarityThreshold, Filter.Expression filterExpression, float[] floatEmbedding) {
return new org.opensearch.client.opensearch.core.SearchRequest.Builder()
.query(Query.of(builder -> builder.knn(KnnQueryBuilder -> KnnQueryBuilder
.filter(Query
.of(queryBuilder -> queryBuilder.queryString(queryStringQuerybuilder -> queryStringQuerybuilder
.query(getOpenSearchQueryString(filterExpression)))))
.field("embedding")
.k(topK)
.vector(floatEmbedding))))
.minScore(similarityThreshold)
.build();
}

private org.opensearch.client.opensearch.core.SearchRequest buildExactQuery(List<Double> embedding, int topK,
double similarityThreshold, Filter.Expression filterExpression) {
return new org.opensearch.client.opensearch.core.SearchRequest.Builder()
.query(buildExactQuery(embedding, filterExpression))
.sort(sortOptionsBuilder -> sortOptionsBuilder
.score(scoreSortBuilder -> scoreSortBuilder.order(SortOrder.Desc)))
.size(topK)
.minScore(similarityThreshold)
.build());
.build();
}

private Query getOpenSearchSimilarityQuery(List<Double> embedding, Filter.Expression filterExpression) {
private Query buildExactQuery(List<Double> embedding, Filter.Expression filterExpression) {
return Query.of(queryBuilder -> queryBuilder.scriptScore(scriptScoreQueryBuilder -> {
scriptScoreQueryBuilder
.query(queryBuilder2 -> queryBuilder2.queryString(queryStringQuerybuilder -> queryStringQuerybuilder
Expand Down Expand Up @@ -218,11 +234,11 @@ public boolean exists(String targetIndex) {
}
}

private CreateIndexResponse createIndexMapping(String index, String mappingJson) {
private CreateIndexResponse createIndexMapping(String mappingJson) {
JsonpMapper jsonpMapper = openSearchClient._transport().jsonpMapper();
try {
return this.openSearchClient.indices()
.create(new CreateIndexRequest.Builder().index(index)
.create(new CreateIndexRequest.Builder().index(this.index)
.settings(settingsBuilder -> settingsBuilder.knn(true))
.mappings(TypeMapping._DESERIALIZER.deserialize(
jsonpMapper.jsonProvider().createParser(new StringReader(mappingJson)), jsonpMapper))
Expand All @@ -235,8 +251,30 @@ private CreateIndexResponse createIndexMapping(String index, String mappingJson)

@Override
public void afterPropertiesSet() {
/**
* Generates a JSON string for the k-NN vector mapping configuration. The
* knn_vector field allows k-NN vectors ingestion into OpenSearch and supports
* various k-NN searches.
* https://opensearch.org/docs/latest/search-plugins/knn/knn-index#method-definitions
*/
if (this.initializeSchema && !exists(this.index)) {
createIndexMapping(this.index, this.mappingJson);
createIndexMapping(Objects.requireNonNullElseGet(openSearchVectorStoreOptions.getMappingJson(),
() -> this.isUseApproximateKnn ? """
{
"properties": {
"embedding": {
"type": "knn_vector",
"dimension": "%d",
"method": {
"name": "hnsw",
"engine": "lucene",
"space_type": "%s"
}
}
}
}
""".formatted(this.openSearchVectorStoreOptions.getDimensions(), this.similarityFunction)
: DEFAULT_MAPPING_EMBEDDING_TYPE_KNN_VECTOR_DIMENSION_1536));
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright 2023 - 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.vectorstore;

/**
* @author Jemin Huh
* @since 1.0.0
*/
public class OpenSearchVectorStoreOptions {

/**
* The name of the index to store the vectors.
*/
private String indexName = "spring-ai-document-index";

/**
* The number of dimensions in the vector.
*/
private int dimensions = 1536;

/**
* The similarity function to use. the potential functions for vector fields at
* https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/#spaces
*/
private String similarity = "cosinesimil";

/**
* Indicates whether to use approximate kNN. If true, the approximate kNN method is
* used for faster searches and maintains good performance even at large scales.
* https://opensearch.org/docs/latest/search-plugins/knn/approximate-knn/ If false,
* the exact brute-force kNN method is used for precise and highly accurate searches.
* https://opensearch.org/docs/latest/search-plugins/knn/knn-score-script/
*/
private boolean useApproximateKnn = false;

private String mappingJson;

public String getIndexName() {
return this.indexName;
}

public void setIndexName(String indexName) {
this.indexName = indexName;
}

public int getDimensions() {
return this.dimensions;
}

public void setDimensions(int dims) {
this.dimensions = dims;
}

public String getSimilarity() {
return similarity;
}

public void setSimilarity(String similarity) {
this.similarity = similarity;
}

public boolean isUseApproximateKnn() {
return this.useApproximateKnn;
}

public void setUseApproximateKnn(boolean useApproximateKnn) {
this.useApproximateKnn = useApproximateKnn;
}

public String getMappingJson() {
return mappingJson;
}

public void setMappingJson(String mappingJson) {
this.mappingJson = mappingJson;
}

}
Loading