diff --git a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java index 3a96fdc9b0e0e..a6a3d8546187f 100644 --- a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java @@ -33,6 +33,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore; +import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.search.ESToParentBlockJoinQuery; import org.elasticsearch.index.search.NestedHelper; @@ -260,6 +261,26 @@ protected int doHashCode() { @Override protected Query doToQuery(SearchExecutionContext context) throws IOException { + return toQuery((this.query::toQuery), path, scoreMode, ignoreUnmapped, context); + } + + /** + * Returns the primitive Lucene query for a nested query given the primitive query to wrap + * @param exception that the queryProvider may throw + * @param queryProvider Retrieves tye query to use given the SearchExecutionContext + * @param path nested path + * @param scoreMode score mode to use + * @param ignoreUnmapped whether to ignore unmapped fields + * @param context search execution context + * @return the primitive Lucene query + */ + public static Query toQuery( + CheckedFunction queryProvider, + String path, + ScoreMode scoreMode, + boolean ignoreUnmapped, + SearchExecutionContext context + ) throws E { if (context.allowExpensiveQueries() == false) { throw new ElasticsearchException( "[joining] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false." @@ -285,7 +306,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { try { context.nestedScope().nextLevel(mapper); - innerQuery = this.query.toQuery(context); + innerQuery = queryProvider.apply(context); } finally { context.nestedScope().previousLevel(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 84b41bf37db56..3a62428f237bc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.mapper; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.ScoreMode; @@ -352,6 +353,21 @@ public Query termQuery(Object value, SearchExecutionContext context) { throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query"); } + @Override + public Query existsQuery(SearchExecutionContext context) { + if (getEmbeddingsField() == null) { + return new MatchNoDocsQuery(); + } + + return NestedQueryBuilder.toQuery( + (c -> getEmbeddingsField().fieldType().existsQuery(c)), + getChunksFieldName(name()), + ScoreMode.None, + false, + context + ); + } + @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { // Redirect the fetcher to load the original values of the field diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index 14de5ceffa6d4..1cae8d981313f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -42,6 +43,7 @@ import org.elasticsearch.index.mapper.SourceToParse; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper; +import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.search.ESToParentBlockJoinQuery; import org.elasticsearch.inference.Model; import org.elasticsearch.inference.SimilarityMeasure; @@ -180,36 +182,10 @@ public void testDynamicUpdate() throws IOException { final String fieldName = "semantic"; final String inferenceId = "test_service"; - MapperService mapperService = createMapperService(mapping(b -> {})); - mapperService.merge( - "_doc", - new CompressedXContent( - Strings.toString(PutMappingRequest.simpleMapping(fieldName, "type=semantic_text,inference_id=" + inferenceId)) - ), - MapperService.MergeReason.MAPPING_UPDATE - ); - - SemanticTextField semanticTextField = new SemanticTextField( + MapperService mapperService = mapperServiceForFieldWithModelSettings( fieldName, - List.of(), - new SemanticTextField.InferenceResult( - inferenceId, - new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null), - List.of() - ), - XContentType.JSON - ); - XContentBuilder builder = JsonXContent.contentBuilder().startObject(); - builder.field(semanticTextField.fieldName()); - builder.value(semanticTextField); - builder.endObject(); - - SourceToParse sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON); - ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse); - mapperService.merge( - "_doc", - parsedDocument.dynamicMappingsUpdate().toCompressedXContent(), - MapperService.MergeReason.MAPPING_UPDATE + inferenceId, + new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) ); assertSemanticTextField(mapperService, fieldName, true); } @@ -565,6 +541,51 @@ private MapperService mapperServiceForFieldWithModelSettings( return mapperService; } + public void testExistsQuerySparseVector() throws IOException { + final String fieldName = "semantic"; + final String inferenceId = "test_service"; + + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null) + ); + + Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); + assertNotNull(mapper); + SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); + Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext); + assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); + } + + public void testExistsQueryDenseVector() throws IOException { + final String fieldName = "semantic"; + final String inferenceId = "test_service"; + + MapperService mapperService = mapperServiceForFieldWithModelSettings( + fieldName, + inferenceId, + new SemanticTextField.ModelSettings( + TaskType.TEXT_EMBEDDING, + 1024, + SimilarityMeasure.COSINE, + DenseVectorFieldMapper.ElementType.FLOAT + ) + ); + + Mapper mapper = mapperService.mappingLookup().getMapper(fieldName); + assertNotNull(mapper); + SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService); + Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext); + assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class)); + } + + @Override + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + // Until a doc is indexed, the query is rewritten as match no docs + assertThat(query, instanceOf(MatchNoDocsQuery.class)); + } + private static void addSemanticTextMapping(XContentBuilder mappingBuilder, String fieldName, String modelId) throws IOException { mappingBuilder.startObject(fieldName); mappingBuilder.field("type", SemanticTextFieldMapper.CONTENT_TYPE); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_semantic_text_exists_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_semantic_text_exists_query.yml new file mode 100644 index 0000000000000..11bd1f87aab06 --- /dev/null +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_semantic_text_exists_query.yml @@ -0,0 +1,144 @@ +setup: + - requires: + cluster_features: "gte_v8.15.0" + reason: semantic_text introduced in 8.15.0 + + - do: + inference.put: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + inference.put: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "api_key": "abc64", + "similarity": "COSINE" + }, + "task_settings": { + } + } + + - do: + indices.create: + index: test-sparse-index + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + + - do: + indices.create: + index: test-dense-index + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + +--- +"Exists query with no indexed documents": + - do: + search: + index: test-sparse-index + body: + query: + exists: + field: "inference_field" + + - match: { hits.total.value: 0 } + +--- +"Exists query with null indexed documents": + - do: + index: + index: test-sparse-index + id: doc + body: + inference_field: null + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + exists: + field: "inference_field" + + - match: { hits.total.value: 0 } + + - do: + index: + index: test-dense-index + id: doc + body: + inference_field: null + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + exists: + field: "inference_field" + + - match: { hits.total.value: 0 } + +--- +"Exists query with indexed documents": + - do: + index: + index: test-sparse-index + id: doc + body: + inference_field: "hello world" + refresh: true + + - do: + search: + index: test-sparse-index + body: + query: + exists: + field: "inference_field" + + - match: { hits.total.value: 1 } + + - do: + index: + index: test-dense-index + id: doc + body: + inference_field: "hello world" + refresh: true + + - do: + search: + index: test-dense-index + body: + query: + exists: + field: "inference_field" + + - match: { hits.total.value: 1 }