elastic · carlosdelest · Jul 1, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java
@@ -33,6 +33,7 @@
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.Queries;
 import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
+import org.elasticsearch.core.CheckedFunction;
 import org.elasticsearch.index.mapper.NestedObjectMapper;
 import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
 import org.elasticsearch.index.search.NestedHelper;
@@ -260,6 +261,26 @@ protected int doHashCode() {
 
     @Override
     protected Query doToQuery(SearchExecutionContext context) throws IOException {
+        return toQuery((this.query::toQuery), path, scoreMode, ignoreUnmapped, context);
+    }
+
+    /**
+     * Returns the primitive Lucene query for a nested query given the primitive query to wrap
+     * @param <E> exception that the queryProvider may throw
+     * @param queryProvider Retrieves tye query to use given the SearchExecutionContext
+     * @param path nested path
+     * @param scoreMode score mode to use
+     * @param ignoreUnmapped whether to ignore unmapped fields
+     * @param context search execution context
+     * @return the primitive Lucene query
+     */
+    public static <E extends Exception> Query toQuery(
+        CheckedFunction<SearchExecutionContext, Query, E> queryProvider,
+        String path,
+        ScoreMode scoreMode,
+        boolean ignoreUnmapped,
+        SearchExecutionContext context
+    ) throws E {
         if (context.allowExpensiveQueries() == false) {
             throw new ElasticsearchException(
                 "[joining] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false."
@@ -285,7 +306,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
 
         try {
             context.nestedScope().nextLevel(mapper);
-            innerQuery = this.query.toQuery(context);
+            innerQuery = queryProvider.apply(context);
         } finally {
             context.nestedScope().previousLevel();
         }

diff --git a/...rence/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/...rence/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java
@@ -7,6 +7,7 @@
 
 package org.elasticsearch.xpack.inference.mapper;
 
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.join.BitSetProducer;
 import org.apache.lucene.search.join.ScoreMode;
@@ -352,6 +353,21 @@ public Query termQuery(Object value, SearchExecutionContext context) {
             throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support term query");
         }
 
+        @Override
+        public Query existsQuery(SearchExecutionContext context) {
+            if (getEmbeddingsField() == null) {
+                return new MatchNoDocsQuery();
+            }
+
+            return NestedQueryBuilder.toQuery(
+                (c -> getEmbeddingsField().fieldType().existsQuery(c)),
+                getChunksFieldName(name()),
+                ScoreMode.None,
+                false,
+                context
+            );
+        }
+
         @Override
         public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
             // Redirect the fetcher to load the original values of the field

diff --git a/.../src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/.../src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java
@@ -13,6 +13,7 @@
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@@ -42,6 +43,7 @@
 import org.elasticsearch.index.mapper.SourceToParse;
 import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
 import org.elasticsearch.index.mapper.vectors.SparseVectorFieldMapper;
+import org.elasticsearch.index.query.SearchExecutionContext;
 import org.elasticsearch.index.search.ESToParentBlockJoinQuery;
 import org.elasticsearch.inference.Model;
 import org.elasticsearch.inference.SimilarityMeasure;
@@ -180,36 +182,10 @@ public void testDynamicUpdate() throws IOException {
         final String fieldName = "semantic";
         final String inferenceId = "test_service";
 
-        MapperService mapperService = createMapperService(mapping(b -> {}));
-        mapperService.merge(
-            "_doc",
-            new CompressedXContent(
-                Strings.toString(PutMappingRequest.simpleMapping(fieldName, "type=semantic_text,inference_id=" + inferenceId))
-            ),
-            MapperService.MergeReason.MAPPING_UPDATE
-        );
-
-        SemanticTextField semanticTextField = new SemanticTextField(
+        MapperService mapperService = mapperServiceForFieldWithModelSettings(
             fieldName,
-            List.of(),
-            new SemanticTextField.InferenceResult(
-                inferenceId,
-                new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null),
-                List.of()
-            ),
-            XContentType.JSON
-        );
-        XContentBuilder builder = JsonXContent.contentBuilder().startObject();
-        builder.field(semanticTextField.fieldName());
-        builder.value(semanticTextField);
-        builder.endObject();
-
-        SourceToParse sourceToParse = new SourceToParse("test", BytesReference.bytes(builder), XContentType.JSON);
-        ParsedDocument parsedDocument = mapperService.documentMapper().parse(sourceToParse);
-        mapperService.merge(
-            "_doc",
-            parsedDocument.dynamicMappingsUpdate().toCompressedXContent(),
-            MapperService.MergeReason.MAPPING_UPDATE
+            inferenceId,
+            new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)
         );
         assertSemanticTextField(mapperService, fieldName, true);
     }
@@ -565,6 +541,51 @@ private MapperService mapperServiceForFieldWithModelSettings(
         return mapperService;
     }
 
+    public void testExistsQuerySparseVector() throws IOException {
+        final String fieldName = "semantic";
+        final String inferenceId = "test_service";
+
+        MapperService mapperService = mapperServiceForFieldWithModelSettings(
+            fieldName,
+            inferenceId,
+            new SemanticTextField.ModelSettings(TaskType.SPARSE_EMBEDDING, null, null, null)
+        );
+
+        Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
+        assertNotNull(mapper);
+        SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService);
+        Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext);
+        assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
+    }
+
+    public void testExistsQueryDenseVector() throws IOException {
+        final String fieldName = "semantic";
+        final String inferenceId = "test_service";
+
+        MapperService mapperService = mapperServiceForFieldWithModelSettings(
+            fieldName,
+            inferenceId,
+            new SemanticTextField.ModelSettings(
+                TaskType.TEXT_EMBEDDING,
+                1024,
+                SimilarityMeasure.COSINE,
+                DenseVectorFieldMapper.ElementType.FLOAT
+            )
+        );
+
+        Mapper mapper = mapperService.mappingLookup().getMapper(fieldName);
+        assertNotNull(mapper);
+        SearchExecutionContext searchExecutionContext = createSearchExecutionContext(mapperService);
+        Query existsQuery = ((SemanticTextFieldMapper) mapper).fieldType().existsQuery(searchExecutionContext);
+        assertThat(existsQuery, instanceOf(ESToParentBlockJoinQuery.class));
+    }
+
+    @Override
+    protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
+        // Until a doc is indexed, the query is rewritten as match no docs
+        assertThat(query, instanceOf(MatchNoDocsQuery.class));
+    }
+
     private static void addSemanticTextMapping(XContentBuilder mappingBuilder, String fieldName, String modelId) throws IOException {
         mappingBuilder.startObject(fieldName);
         mappingBuilder.field("type", SemanticTextFieldMapper.CONTENT_TYPE);

diff --git a/...src/yamlRestTest/resources/rest-api-spec/test/inference/70_semantic_text_exists_query.yml b/...src/yamlRestTest/resources/rest-api-spec/test/inference/70_semantic_text_exists_query.yml
@@ -0,0 +1,144 @@
+setup:
+  - requires:
+      cluster_features: "gte_v8.15.0"
+      reason: semantic_text introduced in 8.15.0
+
+  - do:
+      inference.put:
+        task_type: sparse_embedding
+        inference_id: sparse-inference-id
+        body: >
+          {
+            "service": "test_service",
+            "service_settings": {
+              "model": "my_model",
+              "api_key": "abc64"
+            },
+            "task_settings": {
+            }
+          }
+
+  - do:
+      inference.put:
+        task_type: text_embedding
+        inference_id: dense-inference-id
+        body: >
+          {
+            "service": "text_embedding_test_service",
+            "service_settings": {
+              "model": "my_model",
+              "dimensions": 10,
+              "api_key": "abc64",
+              "similarity": "COSINE"
+            },
+            "task_settings": {
+            }
+          }
+
+  - do:
+      indices.create:
+        index: test-sparse-index
+        body:
+          mappings:
+            properties:
+              inference_field:
+                type: semantic_text
+                inference_id: sparse-inference-id
+
+  - do:
+      indices.create:
+        index: test-dense-index
+        body:
+          mappings:
+            properties:
+              inference_field:
+                type: semantic_text
+                inference_id: dense-inference-id
+
+---
+"Exists query with no indexed documents":
+  - do:
+      search:
+        index: test-sparse-index
+        body:
+          query:
+            exists:
+              field: "inference_field"
+
+  - match: { hits.total.value: 0 }
+
+---
+"Exists query with null indexed documents":
+  - do:
+      index:
+        index: test-sparse-index
+        id: doc
+        body:
+          inference_field: null
+        refresh: true
+
+  - do:
+      search:
+        index: test-sparse-index
+        body:
+          query:
+            exists:
+              field: "inference_field"
+
+  - match: { hits.total.value: 0 }
+
+  - do:
+      index:
+        index: test-dense-index
+        id: doc
+        body:
+          inference_field: null
+        refresh: true
+
+  - do:
+      search:
+        index: test-dense-index
+        body:
+          query:
+            exists:
+              field: "inference_field"
+
+  - match: { hits.total.value: 0 }
+
+---
+"Exists query with indexed documents":
+  - do:
+      index:
+        index: test-sparse-index
+        id: doc
+        body:
+          inference_field: "hello world"
+        refresh: true
+
+  - do:
+      search:
+        index: test-sparse-index
+        body:
+          query:
+            exists:
+              field: "inference_field"
+
+  - match: { hits.total.value: 1 }
+
+  - do:
+      index:
+        index: test-dense-index
+        id: doc
+        body:
+          inference_field: "hello world"
+        refresh: true
+
+  - do:
+      search:
+        index: test-dense-index
+        body:
+          query:
+            exists:
+              field: "inference_field"
+
+  - match: { hits.total.value: 1 }