diff --git a/docs/reference/query-dsl/script-score-query.asciidoc b/docs/reference/query-dsl/script-score-query.asciidoc index 401d323f6fff4..6b7411667f286 100644 --- a/docs/reference/query-dsl/script-score-query.asciidoc +++ b/docs/reference/query-dsl/script-score-query.asciidoc @@ -195,8 +195,16 @@ between a given query vector and document vectors. // NOTCONSOLE NOTE: If a document doesn't have a value for a vector field on which -a vector function is executed, 0 is returned as a result -for this document. +a vector function is executed, an error will be thrown. + +You can check if a document has a value for the field `my_vector` by +`doc['my_vector'].size() == 0`. Your overall script can look like this: + +[source,js] +-------------------------------------------------- +"source": "doc['my_vector'].size() == 0 ? 0 : cosineSimilarity(params.queryVector, doc['my_vector'])" +-------------------------------------------------- +// NOTCONSOLE NOTE: If a document's dense vector field has a number of dimensions different from the query's vector, an error will be thrown. diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml index 22cfd5169686d..6d513609b1631 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/20_dense_vector_special_cases.yml @@ -131,7 +131,7 @@ setup: - match: { error.root_cause.0.type: "script_exception" } --- -"Distance functions for documents missing vector field should return 0": +"Documents missing a vector field": - do: index: index: test-index @@ -149,7 +149,9 @@ setup: - do: indices.refresh: {} +# expect an error when documents miss a vector field - do: + catch: bad_request headers: Content-Type: application/json search: @@ -162,6 +164,22 @@ setup: source: "cosineSimilarity(params.query_vector, doc['my_dense_vector'])" params: query_vector: [10.0, 10.0, 10.0] +- match: { error.root_cause.0.type: "script_exception" } + +# guard against missing values by checking size() +- do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "doc['my_dense_vector'].size() == 0 ? 0 : cosineSimilarity(params.query_vector, doc['my_dense_vector'])" + params: + query_vector: [10.0, 10.0, 10.0] - match: {hits.total: 2} - match: {hits.hits.0._id: "1"} diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml index 615da302cae69..541d4c6c80254 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/vectors/40_sparse_vector_special_cases.yml @@ -87,7 +87,7 @@ setup: - match: {hits.hits.2._id: "3"} --- -"Distance functions for documents missing vector field should return 0": +"Documents missing a vector field": - do: index: index: test-index @@ -105,7 +105,9 @@ setup: - do: indices.refresh: {} +# expect an error when documents miss a vector field - do: + catch: bad_request headers: Content-Type: application/json search: @@ -118,6 +120,22 @@ setup: source: "cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])" params: query_vector: {"1": 10.0} +- match: { error.root_cause.0.type: "script_exception" } + +# guard against missing values by checking size() +- do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "doc['my_sparse_vector'].size() == 0 ? 0 : cosineSimilaritySparse(params.query_vector, doc['my_sparse_vector'])" + params: + query_vector: {"1": 10.0} - match: {hits.total: 2} - match: {hits.hits.0._id: "1"} diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java index fcb02cee68822..34a1ae2c12a3c 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/ScoreScriptUtils.java @@ -28,7 +28,6 @@ public class ScoreScriptUtils { */ public static double dotProduct(List queryVector, VectorScriptDocValues.DenseVectorScriptDocValues dvs){ BytesRef value = dvs.getEncodedValue(); - if (value == null) return 0; float[] docVector = VectorEncoderDecoder.decodeDenseVector(value); if (queryVector.size() != docVector.length) { throw new IllegalArgumentException("Can't calculate dotProduct! The number of dimensions of the query vector [" + @@ -63,7 +62,6 @@ public CosineSimilarity(List queryVector) { public double cosineSimilarity(VectorScriptDocValues.DenseVectorScriptDocValues dvs) { BytesRef value = dvs.getEncodedValue(); - if (value == null) return 0; float[] docVector = VectorEncoderDecoder.decodeDenseVector(value); if (queryVector.size() != docVector.length) { throw new IllegalArgumentException("Can't calculate cosineSimilarity! The number of dimensions of the query vector [" + @@ -129,7 +127,6 @@ public DotProductSparse(Map queryVector) { public double dotProductSparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) { BytesRef value = dvs.getEncodedValue(); - if (value == null) return 0; int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value); float[] docValues = VectorEncoderDecoder.decodeSparseVector(value); return intDotProductSparse(queryValues, queryDims, docValues, docDims); @@ -174,7 +171,6 @@ public CosineSimilaritySparse(Map queryVector) { public double cosineSimilaritySparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) { BytesRef value = dvs.getEncodedValue(); - if (value == null) return 0; int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value); float[] docValues = VectorEncoderDecoder.decodeSparseVector(value); diff --git a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java index 000caf74656eb..f22e7ad16ea50 100644 --- a/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java +++ b/x-pack/plugin/vectors/src/main/java/org/elasticsearch/xpack/vectors/query/VectorScriptDocValues.java @@ -41,12 +41,16 @@ BytesRef getEncodedValue() { @Override public BytesRef get(int index) { - throw new UnsupportedOperationException("vector fields may only be used via vector functions in scripts"); + throw new UnsupportedOperationException("accessing a vector field's value through 'get' or 'value' is not supported"); } @Override public int size() { - throw new UnsupportedOperationException("vector fields may only be used via vector functions in scripts"); + if (value == null) { + return 0; + } else { + return 1; + } } // not final, as it needs to be extended by Mockito for tests