elastic · Samiul-TheSoccerFan · Jul 29, 2024 · Jul 25, 2024 · Jul 25, 2024 · Jul 25, 2024
diff --git a/...ava/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java b/...ava/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java
@@ -33,6 +33,7 @@
 import java.util.Locale;
 import java.util.Map;
 
+import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilterTestUtil.randomInputCasesForSemanticText;
 import static org.hamcrest.Matchers.equalTo;
 
 public class ShardBulkInferenceActionFilterIT extends ESIntegTestCase {
@@ -93,8 +94,8 @@ public void testBulkOperations() throws Exception {
                 String id = Long.toString(totalDocs);
                 boolean isIndexRequest = randomBoolean();
                 Map<String, Object> source = new HashMap<>();
-                source.put("sparse_field", isIndexRequest && rarely() ? null : randomAlphaOfLengthBetween(0, 1000));
-                source.put("dense_field", isIndexRequest && rarely() ? null : randomAlphaOfLengthBetween(0, 1000));
+                source.put("sparse_field", isIndexRequest && rarely() ? null : randomInputCasesForSemanticText());
+                source.put("dense_field", isIndexRequest && rarely() ? null : randomInputCasesForSemanticText());
                 if (isIndexRequest) {
                     bulkReqBuilder.add(new IndexRequestBuilder(client()).setIndex(INDEX_NAME).setId(id).setSource(source));
                     totalDocs++;

diff --git a/.../java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/.../java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java
@@ -492,12 +492,16 @@ private Map<String, List<FieldInferenceRequest>> createFieldInferenceRequests(Bu
      * If {@code valueObj} is not a string or a collection of strings, it throws an ElasticsearchStatusException.
      */
     private static List<String> nodeStringValues(String field, Object valueObj) {
-        if (valueObj instanceof String value) {
+        if (valueObj instanceof Number || valueObj instanceof Boolean) {
+            return List.of(valueObj.toString());
+        } else if (valueObj instanceof String value) {
             return List.of(value);
         } else if (valueObj instanceof Collection<?> values) {
             List<String> valuesString = new ArrayList<>();
             for (var v : values) {
-                if (v instanceof String value) {
+                if (v instanceof Number || v instanceof Boolean) {
+                    valuesString.add(v.toString());
+                } else if (v instanceof String value) {
                     valuesString.add(value);
                 } else {
                     throw new ElasticsearchStatusException(

diff --git a/...g/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTestUtil.java b/...g/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTestUtil.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.inference.action.filter;
+
+import static org.elasticsearch.test.ESTestCase.randomAlphaOfLengthBetween;
+import static org.elasticsearch.test.ESTestCase.randomBoolean;
+import static org.elasticsearch.test.ESTestCase.randomDouble;
+import static org.elasticsearch.test.ESTestCase.randomFloat;
+import static org.elasticsearch.test.ESTestCase.randomInt;
+import static org.elasticsearch.test.ESTestCase.randomIntBetween;
+import static org.elasticsearch.test.ESTestCase.randomLong;
+
+public class ShardBulkInferenceActionFilterTestUtil {
+
+    /**
+     * Returns a randomly generated object for Semantic Text tests purpose.
+     */
+    public static Object randomInputCasesForSemanticText() {
+        int randomInt = randomIntBetween(0, 4);
+        return switch (randomInt) {
+            case 0 -> randomAlphaOfLengthBetween(10, 20);
+            case 1 -> randomInt();
+            case 2 -> randomLong();
+            case 3 -> randomFloat();
+            case 4 -> randomBoolean();
+            default -> randomDouble();
+        };
+    }
+}
diff --git a/.../org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java b/.../org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java
@@ -35,6 +35,7 @@
 import org.elasticsearch.xcontent.json.JsonXContent;
 import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults;
 import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults;
+import org.elasticsearch.xpack.inference.mapper.SemanticTextField;
 import org.elasticsearch.xpack.inference.model.TestModel;
 import org.elasticsearch.xpack.inference.registry.ModelRegistry;
 import org.junit.After;
@@ -55,8 +56,10 @@
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.awaitLatch;
 import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter.DEFAULT_BATCH_SIZE;
 import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilter.getIndexRequestOrNull;
+import static org.elasticsearch.xpack.inference.action.filter.ShardBulkInferenceActionFilterTestUtil.randomInputCasesForSemanticText;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticText;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSparseEmbeddings;
+import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.semanticTextFieldFromChunkedInferenceResults;
 import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.toChunkedResult;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
@@ -331,16 +334,31 @@ private static BulkItemRequest[] randomBulkItemRequest(
         for (var entry : fieldInferenceMap.values()) {
             String field = entry.getName();
             var model = modelMap.get(entry.getInferenceId());
-            String text = randomAlphaOfLengthBetween(10, 20);
-            docMap.put(field, text);
-            expectedDocMap.put(field, text);
+            Object inputObject = randomInputCasesForSemanticText();
+            String inputText = inputObject.toString();
+            docMap.put(field, inputObject);
+            expectedDocMap.put(field, inputText);
             if (model == null) {
                 // ignore results, the doc should fail with a resource not found exception
                 continue;
             }
-            var result = randomSemanticText(field, model, List.of(text), requestContentType);
-            model.putResult(text, toChunkedResult(result));
-            expectedDocMap.put(field, result);
+
+            SemanticTextField semanticTextField;
+            if (model.hasResult(inputText)) {
+                ChunkedInferenceServiceResults results = model.getResults(inputText);
+                semanticTextField = semanticTextFieldFromChunkedInferenceResults(
+                    field,
+                    model,
+                    List.of(inputText),
+                    results,
+                    requestContentType
+                );
+            } else {
+                semanticTextField = randomSemanticText(field, model, List.of(inputText), requestContentType);
+                model.putResult(inputText, toChunkedResult(semanticTextField));
+            }
+
+            expectedDocMap.put(field, semanticTextField);
         }
 
         int requestId = randomIntBetween(0, Integer.MAX_VALUE);
@@ -383,5 +401,9 @@ ChunkedInferenceServiceResults getResults(String text) {
         void putResult(String text, ChunkedInferenceServiceResults result) {
             resultMap.put(text, result);
         }
+
+        boolean hasResult(String text) {
+            return resultMap.containsKey(text);
+        }
     }
 }
diff --git a/...erence/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/...erence/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java
@@ -192,6 +192,16 @@ public static SemanticTextField randomSemanticText(String fieldName, Model model
             case SPARSE_EMBEDDING -> randomSparseEmbeddings(inputs);
             default -> throw new AssertionError("invalid task type: " + model.getTaskType().name());
         };
+        return semanticTextFieldFromChunkedInferenceResults(fieldName, model, inputs, results, contentType);
+    }
+
+    public static SemanticTextField semanticTextFieldFromChunkedInferenceResults(
+        String fieldName,
+        Model model,
+        List<String> inputs,
+        ChunkedInferenceServiceResults results,
+        XContentType contentType
+    ) {
         return new SemanticTextField(
             fieldName,
             inputs,

diff --git a/...ce/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml b/...ce/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml
@@ -74,6 +74,93 @@ setup:
   - match: { _source.dense_field.inference.chunks.0.text: "another inference test" }
   - match: { _source.non_inference_field: "non inference test" }
 
+---
+"Calculates text expansion and embedding results for new documents with integer value":
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          sparse_field: 75
+          dense_field: 100
+
+  - do:
+      get:
+        index: test-index
+        id: doc_1
+
+  - match: { _source.sparse_field.text: "75" }
+  - exists: _source.sparse_field.inference.chunks.0.embeddings
+  - match: { _source.sparse_field.inference.chunks.0.text: "75" }
+  - match: { _source.dense_field.text: "100" }
+  - exists: _source.dense_field.inference.chunks.0.embeddings
+  - match: { _source.dense_field.inference.chunks.0.text: "100" }
+
+---
+"Calculates text expansion and embedding results for new documents with boolean value":
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          sparse_field: true
+          dense_field: false
+
+  - do:
+      get:
+        index: test-index
+        id: doc_1
+
+  - match: { _source.sparse_field.text: "true" }
+  - exists: _source.sparse_field.inference.chunks.0.embeddings
+  - match: { _source.sparse_field.inference.chunks.0.text: "true" }
+  - match: { _source.dense_field.text: "false" }
+  - exists: _source.dense_field.inference.chunks.0.embeddings
+  - match: { _source.dense_field.inference.chunks.0.text: "false" }
+
+---
+"Calculates text expansion and embedding results for new documents with collection":
+  - do:
+      index:
+        index: test-index
+        id: doc_1
+        body:
+          sparse_field: [false, 75, "inference test", 13.49]
+          dense_field: [true, 49.99, "another inference test", 5654]
+
+  - do:
+      get:
+        index: test-index
+        id: doc_1
+
+  - length: { _source.sparse_field.text: 4 }
+  - match: { _source.sparse_field.text.0: "false" }
+  - match: { _source.sparse_field.text.1: "75" }
+  - match: { _source.sparse_field.text.2: "inference test" }
+  - match: { _source.sparse_field.text.3: "13.49" }
+  - exists: _source.sparse_field.inference.chunks.0.embeddings
+  - exists: _source.sparse_field.inference.chunks.1.embeddings
+  - exists: _source.sparse_field.inference.chunks.2.embeddings
+  - exists: _source.sparse_field.inference.chunks.3.embeddings
+  - match: { _source.sparse_field.inference.chunks.0.text: "false" }
+  - match: { _source.sparse_field.inference.chunks.1.text: "75" }
+  - match: { _source.sparse_field.inference.chunks.2.text: "inference test" }
+  - match: { _source.sparse_field.inference.chunks.3.text: "13.49" }
+
+  - length: { _source.dense_field.text: 4 }
+  - match: { _source.dense_field.text.0: "true" }
+  - match: { _source.dense_field.text.1: "49.99" }
+  - match: { _source.dense_field.text.2: "another inference test" }
+  - match: { _source.dense_field.text.3: "5654" }
+  - exists: _source.dense_field.inference.chunks.0.embeddings
+  - exists: _source.dense_field.inference.chunks.1.embeddings
+  - exists: _source.dense_field.inference.chunks.2.embeddings
+  - exists: _source.dense_field.inference.chunks.3.embeddings
+  - match: { _source.dense_field.inference.chunks.0.text: "true" }
+  - match: { _source.dense_field.inference.chunks.1.text: "49.99" }
+  - match: { _source.dense_field.inference.chunks.2.text: "another inference test" }
+  - match: { _source.dense_field.inference.chunks.3.text: "5654" }
+
 ---
 "Inference fields do not create new mappings":
   - do:

diff --git a/...erence/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/...erence/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml
@@ -90,6 +90,64 @@ setup:
   - close_to: { hits.hits.0._score: { value: 3.7837332e17, error: 1e10 } }
   - length: { hits.hits.0._source.inference_field.inference.chunks: 2 }
 
+---
+"Numeric query using a sparse embedding model":
+  - skip:
+      features: [ "headers", "close_to" ]
+
+  - do:
+      index:
+        index: test-sparse-index
+        id: doc_1
+        body:
+          inference_field: [40, 49.678]
+        refresh: true
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the floating-point score as a double
+        Content-Type: application/json
+      search:
+        index: test-sparse-index
+        body:
+          query:
+            semantic:
+              field: "inference_field"
+              query: "40"
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "doc_1" }
+  - length: { hits.hits.0._source.inference_field.inference.chunks: 2 }
+
+---
+"Boolean query using a sparse embedding model":
+  - skip:
+      features: [ "headers", "close_to" ]
+
+  - do:
+      index:
+        index: test-sparse-index
+        id: doc_1
+        body:
+          inference_field: true
+        refresh: true
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the floating-point score as a double
+        Content-Type: application/json
+      search:
+        index: test-sparse-index
+        body:
+          query:
+            semantic:
+              field: "inference_field"
+              query: "true"
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "doc_1" }
+  - length: { hits.hits.0._source.inference_field.inference.chunks: 1 }
+
 ---
 "Query using a dense embedding model":
   - skip:
@@ -121,6 +179,64 @@ setup:
   - close_to: { hits.hits.0._score: { value: 1.0, error: 0.0001 } }
   - length: { hits.hits.0._source.inference_field.inference.chunks: 2 }
 
+---
+"Numeric query using a dense embedding model":
+  - skip:
+      features: [ "headers", "close_to" ]
+
+  - do:
+      index:
+        index: test-dense-index
+        id: doc_1
+        body:
+          inference_field: [45.1, 100]
+        refresh: true
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the floating-point score as a double
+        Content-Type: application/json
+      search:
+        index: test-dense-index
+        body:
+          query:
+            semantic:
+              field: "inference_field"
+              query: "45.1"
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "doc_1" }
+  - length: { hits.hits.0._source.inference_field.inference.chunks: 2 }
+
+---
+"Boolean query using a dense embedding model":
+  - skip:
+      features: [ "headers", "close_to" ]
+
+  - do:
+      index:
+        index: test-dense-index
+        id: doc_1
+        body:
+          inference_field: false
+        refresh: true
+
+  - do:
+      headers:
+        # Force JSON content type so that we use a parser that interprets the floating-point score as a double
+        Content-Type: application/json
+      search:
+        index: test-dense-index
+        body:
+          query:
+            semantic:
+              field: "inference_field"
+              query: "false"
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "doc_1" }
+  - length: { hits.hits.0._source.inference_field.inference.chunks: 1 }
+
 ---
 "Query using a dense embedding model that uses byte embeddings":
   - skip: