From 987a7096abe57e58e020f9e76f5bf589bef0648d Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 09:06:37 +0200 Subject: [PATCH 01/26] Adding the TEXT_EMBEDDING_FUNCTION capability. --- .../elasticsearch/xpack/esql/action/EsqlCapabilities.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 6082b5e6e2576..7a87b9ec9d9c8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1314,6 +1314,11 @@ public enum Cap { */ KNN_FUNCTION_V5(Build.current().isSnapshot()), + /** + * Support for the {@code TEXT_EMBEDDING} function for generating dense vector embeddings. + */ + TEXT_EMBEDDING_FUNCTION(Build.current().isSnapshot()), + /** * Support for the LIKE operator with a list of wildcards. */ From 516a0b6daf4a34a4d4f43f0fed03ddd955a75d2a Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 09:13:44 +0200 Subject: [PATCH 02/26] Add InferenceFunction and TextEmbedding classes for TEXT_EMBEDDING function --- .../esql/expression/ExpressionWritables.java | 6 + .../function/inference/InferenceFunction.java | 41 +++++ .../inference/InferenceWritables.java | 36 ++++ .../function/inference/TextEmbedding.java | 164 ++++++++++++++++++ 4 files changed, 247 insertions(+) create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 2e06db66a85e5..07e3c73e666d7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateWritables; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextWritables; +import org.elasticsearch.xpack.esql.expression.function.inference.InferenceWritables; import org.elasticsearch.xpack.esql.expression.function.scalar.ScalarFunctionWritables; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromBase64; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble; @@ -120,6 +121,7 @@ public static List getNamedWriteables() { entries.addAll(fullText()); entries.addAll(unaryScalars()); entries.addAll(vector()); + entries.addAll(inference()); return entries; } @@ -265,4 +267,8 @@ private static List fullText() { private static List vector() { return VectorWritables.getNamedWritables(); } + + private static List inference() { + return InferenceWritables.getNamedWritables(); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java new file mode 100644 index 0000000000000..d2d6d9b6e2af7 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceFunction.java @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.Function; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.util.List; + +/** + * Base class for ESQL functions that use inference endpoints (e.g., TEXT_EMBEDDING). + */ +public abstract class InferenceFunction> extends Function { + + public static final String INFERENCE_ID_PARAMETER_NAME = "inference_id"; + + protected InferenceFunction(Source source, List children) { + super(source, children); + } + + /** The inference endpoint identifier expression. */ + public abstract Expression inferenceId(); + + /** The task type required by this function (e.g., TEXT_EMBEDDING). */ + public abstract TaskType taskType(); + + /** Returns a copy with inference resolution error for display to user. */ + public abstract PlanType withInferenceResolutionError(String inferenceId, String error); + + /** True if this function contains nested inference function calls. */ + public boolean hasNestedInferenceFunction() { + return anyMatch(e -> e instanceof InferenceFunction && e != this); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java new file mode 100644 index 0000000000000..9809ef0d46b66 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java @@ -0,0 +1,36 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Defines the named writables for inference functions in ESQL. + */ +public final class InferenceWritables { + + private InferenceWritables() { + // Utility class + throw new UnsupportedOperationException(); + } + + public static List getNamedWritables() { + List entries = new ArrayList<>(); + + if (EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()) { + entries.add(TextEmbedding.ENTRY); + } + + return Collections.unmodifiableList(entries); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java new file mode 100644 index 0000000000000..a5ef509df1dff --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -0,0 +1,164 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.inference.TaskType; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * TEXT_EMBEDDING function converts text to dense vector embeddings using an inference endpoint. + */ +public class TextEmbedding extends InferenceFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "TextEmbedding", + TextEmbedding::new + ); + + private final Expression inferenceId; + private final Expression inputText; + + @FunctionInfo( + returnType = "dense_vector", + description = "Generates dense vector embeddings for text using a specified inference endpoint.", + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }, + preview = true + ) + public TextEmbedding( + Source source, + @Param(name = "text", type = { "keyword", "text" }, description = "Text to embed") Expression inputText, + @Param( + name = InferenceFunction.INFERENCE_ID_PARAMETER_NAME, + type = { "keyword", "text" }, + description = "Identifier of the inference endpoint" + ) Expression inferenceId + ) { + super(source, List.of(inputText, inferenceId)); + this.inferenceId = inferenceId; + this.inputText = inputText; + } + + private TextEmbedding(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(inputText); + out.writeNamedWriteable(inferenceId); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public Expression inputText() { + return inputText; + } + + @Override + public Expression inferenceId() { + return inferenceId; + } + + @Override + public boolean foldable() { + return inferenceId.foldable() && inputText.foldable(); + } + + @Override + public DataType dataType() { + return DataType.DENSE_VECTOR; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + TypeResolution textResolution = isNotNull(inputText, sourceText(), FIRST).and(isFoldable(inputText, sourceText(), FIRST)) + .and(isString(inputText, sourceText(), FIRST)); + + if (textResolution.unresolved()) { + return textResolution; + } + + TypeResolution inferenceIdResolution = isNotNull(inferenceId, sourceText(), SECOND).and(isString(inferenceId, sourceText(), SECOND)) + .and(isFoldable(inferenceId, sourceText(), SECOND)); + + if (inferenceIdResolution.unresolved()) { + return inferenceIdResolution; + } + + return TypeResolution.TYPE_RESOLVED; + } + + @Override + public TaskType taskType() { + return TaskType.TEXT_EMBEDDING; + } + + @Override + public TextEmbedding withInferenceResolutionError(String inferenceId, String error) { + return new TextEmbedding(source(), inputText, new UnresolvedAttribute(inferenceId().source(), inferenceId, error)); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new TextEmbedding(source(), newChildren.get(0), newChildren.get(1)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, TextEmbedding::new, inputText, inferenceId); + } + + @Override + public String toString() { + return "TEXT_EMBEDDING(" + inputText + ", " + inferenceId + ")"; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + if (super.equals(o) == false) return false; + TextEmbedding textEmbedding = (TextEmbedding) o; + return Objects.equals(inferenceId, textEmbedding.inferenceId) && Objects.equals(inputText, textEmbedding.inputText); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), inferenceId, inputText); + } +} From 36df7cfd57cc0458b4e76d2896e9cb9ea287c9bc Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 09:19:28 +0200 Subject: [PATCH 03/26] Adding tests for the TextEmbedding function. --- .../inference/TextEmbeddingErrorTests.java | 74 +++++++++++++++++++ .../TextEmbeddingSerializationTests.java | 46 ++++++++++++ .../inference/TextEmbeddingTests.java | 72 ++++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java new file mode 100644 index 0000000000000..9af017bd5207f --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.ErrorsForCasesWithoutExamplesTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.Matcher; +import org.junit.Before; + +import java.util.List; +import java.util.Locale; +import java.util.Set; + +import static org.hamcrest.Matchers.equalTo; + +/** Tests error conditions and type validation for TEXT_EMBEDDING function. */ +public class TextEmbeddingErrorTests extends ErrorsForCasesWithoutExamplesTestCase { + + @Before + public void checkCapability() { + assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + } + + @Override + protected List cases() { + return paramsToSuppliers(TextEmbeddingTests.parameters()); + } + + @Override + protected Expression build(Source source, List args) { + return new TextEmbedding(source, args.get(0), args.get(1)); + } + + @Override + protected Matcher expectedTypeErrorMatcher(List> validPerPosition, List signature) { + return equalTo(typeErrorMessage(true, validPerPosition, signature, (v, p) -> "string")); + } + + protected static String typeErrorMessage( + boolean includeOrdinal, + List> validPerPosition, + List signature, + AbstractFunctionTestCase.PositionalErrorMessageSupplier positionalErrorMessageSupplier + ) { + for (int i = 0; i < signature.size(); i++) { + if (signature.get(i) == DataType.NULL) { + String ordinal = includeOrdinal ? TypeResolutions.ParamOrdinal.fromIndex(i).name().toLowerCase(Locale.ROOT) + " " : ""; + return ordinal + "argument of [" + sourceForSignature(signature) + "] cannot be null, received []"; + } + + if (validPerPosition.get(i).contains(signature.get(i)) == false) { + break; + } + } + + return ErrorsForCasesWithoutExamplesTestCase.typeErrorMessage( + includeOrdinal, + validPerPosition, + signature, + positionalErrorMessageSupplier + ); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java new file mode 100644 index 0000000000000..5d7e1dfa4301a --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; +import org.junit.Before; + +import java.io.IOException; + +/** Tests serialization/deserialization of TEXT_EMBEDDING function instances. */ +public class TextEmbeddingSerializationTests extends AbstractExpressionSerializationTests { + + @Before + public void checkCapability() { + assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + } + + @Override + protected TextEmbedding createTestInstance() { + Source source = randomSource(); + Expression inputText = randomChild(); + Expression inferenceId = randomChild(); + return new TextEmbedding(source, inputText, inferenceId); + } + + @Override + protected TextEmbedding mutateInstance(TextEmbedding instance) throws IOException { + Source source = instance.source(); + Expression inputText = instance.inputText(); + Expression inferenceId = instance.inferenceId(); + if (randomBoolean()) { + inputText = randomValueOtherThan(inputText, AbstractExpressionSerializationTests::randomChild); + } else { + inferenceId = randomValueOtherThan(inferenceId, AbstractExpressionSerializationTests::randomChild); + } + return new TextEmbedding(source, inputText, inferenceId); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java new file mode 100644 index 0000000000000..b6fdc7addf984 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.inference; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.Matchers; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("text_embedding") +public class TextEmbeddingTests extends AbstractFunctionTestCase { + @Before + public void checkCapability() { + assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + } + + public TextEmbeddingTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + + // Test all string type combinations for text input and inference endpoint ID + for (DataType inputTextDataType : DataType.stringTypes()) { + for (DataType inferenceIdDataType : DataType.stringTypes()) { + suppliers.add( + new TestCaseSupplier( + List.of(inputTextDataType, inferenceIdDataType), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), inputTextDataType, "text"), + new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), inferenceIdDataType, "inference_id") + ), + Matchers.blankOrNullString(), + DENSE_VECTOR, + equalTo(true) + ) + ) + ); + } + } + + return parameterSuppliersFromTypedData(suppliers); + } + + @Override + protected Expression build(Source source, List args) { + return new TextEmbedding(source, args.get(0), args.get(1)); + } +} From 918bdb71b4d15f4f18739fcf44bdd21779eaee24 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 09:21:23 +0200 Subject: [PATCH 04/26] Update ESQL usage tests --- .../yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index 72cb6efe42efc..a1211b9e62ff3 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -89,6 +89,7 @@ setup: - set: {esql.functions.to_long: functions_to_long} - set: {esql.functions.coalesce: functions_coalesce} - set: {esql.functions.categorize: functions_categorize} + - set: {esql.functions.text_embedding: functions_text_embedding} - do: esql.query: @@ -131,6 +132,7 @@ setup: - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - gt: {esql.functions.categorize: $functions_categorize} + - match: {esql.functions.text_embedding: $functions_text_embedding} # Testing for the entire function set isn't feasible, so we just check that we return the correct count as an approximation. # - length: {esql.functions: 179} # check the "sister" test below for a likely update to the same esql.functions length check --- From 4418a32a5b4fab19d620bedbd8a6ee7427c6725f Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 10:21:59 +0200 Subject: [PATCH 05/26] Add text_embedding to the EsqlFunctionRegistry --- .../xpack/esql/expression/function/EsqlFunctionRegistry.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 89ce24bd779b4..bf58f3750841f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -63,6 +63,7 @@ import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; +import org.elasticsearch.xpack.esql.expression.function.inference.TextEmbedding; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least; @@ -541,7 +542,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(Magnitude.class, Magnitude::new, "v_magnitude"), def(Hamming.class, Hamming::new, "v_hamming"), def(UrlEncode.class, UrlEncode::new, "url_encode"), - def(UrlDecode.class, UrlDecode::new, "url_decode") } }; + def(UrlDecode.class, UrlDecode::new, "url_decode"), + def(TextEmbedding.class, bi(TextEmbedding::new), "text_embedding")}}; } public EsqlFunctionRegistry snapshotRegistry() { From 4bb147d19bdec92b96bc931ece1fa98029699d18 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 10:23:25 +0200 Subject: [PATCH 06/26] Add text_embedding tests generated doc --- .../esql/images/functions/text_embedding.svg | 1 + .../esql/kibana/definition/functions/text_embedding.json | 9 +++++++++ .../esql/kibana/docs/functions/text_embedding.md | 4 ++++ 3 files changed, 14 insertions(+) create mode 100644 docs/reference/query-languages/esql/images/functions/text_embedding.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md diff --git a/docs/reference/query-languages/esql/images/functions/text_embedding.svg b/docs/reference/query-languages/esql/images/functions/text_embedding.svg new file mode 100644 index 0000000000000..dab58c5e5bda0 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/text_embedding.svg @@ -0,0 +1 @@ +TEXT_EMBEDDING(text,inference_id) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json new file mode 100644 index 0000000000000..9e4967b92c367 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json @@ -0,0 +1,9 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "text_embedding", + "description" : "Generates dense vector embeddings for text using a specified inference endpoint.", + "signatures" : [ ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md b/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md new file mode 100644 index 0000000000000..bb3e74fc116cd --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md @@ -0,0 +1,4 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### TEXT EMBEDDING +Generates dense vector embeddings for text using a specified inference endpoint. From ddf3db52397ad665f0b3753d6bcae27413b00e2c Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 10:24:48 +0200 Subject: [PATCH 07/26] InferenceResolver can now resolve inference ids used in a logical plan fron inference functions. --- .../esql/inference/InferenceResolver.java | 68 ++++++++++++++++--- .../esql/inference/InferenceService.java | 7 +- .../xpack/esql/session/EsqlSession.java | 2 +- .../inference/InferenceResolverTests.java | 39 +++++++++-- 4 files changed, 96 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java index abb4eef251374..cb6fdefec7c10 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java @@ -16,6 +16,11 @@ import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.expression.function.FunctionDefinition; +import org.elasticsearch.xpack.esql.expression.function.UnresolvedFunction; +import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.inference.InferencePlan; @@ -30,7 +35,7 @@ public class InferenceResolver { private final Client client; - + private final EsqlFunctionRegistry functionRegistry; private final ThreadPool threadPool; /** @@ -38,8 +43,9 @@ public class InferenceResolver { * * @param client The Elasticsearch client for executing inference deployment lookups */ - public InferenceResolver(Client client, ThreadPool threadPool) { + public InferenceResolver(Client client, EsqlFunctionRegistry functionRegistry, ThreadPool threadPool) { this.client = client; + this.functionRegistry = functionRegistry; this.threadPool = threadPool; } @@ -56,9 +62,8 @@ public InferenceResolver(Client client, ThreadPool threadPool) { * @param listener Callback to receive the resolution results */ public void resolveInferenceIds(LogicalPlan plan, ActionListener listener) { - List inferenceIds = new ArrayList<>(); - collectInferenceIds(plan, inferenceIds::add); - resolveInferenceIds(inferenceIds, listener); + + resolveInferenceIds(collectInferenceIds(plan), listener); } /** @@ -68,13 +73,17 @@ public void resolveInferenceIds(LogicalPlan plan, ActionListener *
  • {@link InferencePlan} objects (Completion, etc.)
  • + *
  • {@link InferenceFunction} objects (TextEmbedding, etc.)
  • * * * @param plan The logical plan to scan for inference operations - * @param c Consumer function to receive each discovered inference ID */ - void collectInferenceIds(LogicalPlan plan, Consumer c) { - collectInferenceIdsFromInferencePlans(plan, c); + List collectInferenceIds(LogicalPlan plan) { + List inferenceIds = new ArrayList<>(); + collectInferenceIdsFromInferencePlans(plan, inferenceIds::add); + collectInferenceIdsFromInferenceFunctions(plan, inferenceIds::add); + + return inferenceIds; } /** @@ -134,6 +143,28 @@ private void collectInferenceIdsFromInferencePlans(LogicalPlan plan, Consumer c.accept(inferenceId(inferencePlan))); } + /** + * Collects inference IDs from function expressions within the logical plan. + * + * @param plan The logical plan to scan for function expressions + * @param c Consumer function to receive each discovered inference ID + */ + private void collectInferenceIdsFromInferenceFunctions(LogicalPlan plan, Consumer c) { + EsqlFunctionRegistry snapshotRegistry = functionRegistry.snapshotRegistry(); + plan.forEachExpressionUp(UnresolvedFunction.class, f -> { + String functionName = snapshotRegistry.resolveAlias(f.name()); + if (snapshotRegistry.functionExists(functionName)) { + FunctionDefinition def = snapshotRegistry.resolveFunction(functionName); + if (InferenceFunction.class.isAssignableFrom(def.clazz())) { + String inferenceId = inferenceId(f, def); + if (inferenceId != null) { + c.accept(inferenceId); + } + } + } + }); + } + /** * Extracts the inference ID from an InferencePlan object. * @@ -148,6 +179,23 @@ private static String inferenceId(Expression e) { return BytesRefs.toString(e.fold(FoldContext.small())); } + public String inferenceId(UnresolvedFunction f, FunctionDefinition def) { + EsqlFunctionRegistry.FunctionDescription functionDescription = EsqlFunctionRegistry.description(def); + + for (int i = 0; i < functionDescription.args().size(); i++) { + EsqlFunctionRegistry.ArgSignature arg = functionDescription.args().get(i); + + if (arg.name().equals(InferenceFunction.INFERENCE_ID_PARAMETER_NAME)) { + Expression argValue = f.arguments().get(i); + if (argValue != null && argValue.foldable() && DataType.isString(argValue.dataType())) { + return inferenceId(argValue); + } + } + } + + return null; + } + public static Factory factory(Client client) { return new Factory(client, client.threadPool()); } @@ -161,8 +209,8 @@ private Factory(Client client, ThreadPool threadPool) { this.threadPool = threadPool; } - public InferenceResolver create() { - return new InferenceResolver(client, threadPool); + public InferenceResolver create(EsqlFunctionRegistry functionRegistry) { + return new InferenceResolver(client, functionRegistry, threadPool); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceService.java index 37c163beaecda..630477a20f447 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceService.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.inference; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceRunner; import org.elasticsearch.xpack.esql.inference.bulk.BulkInferenceRunnerConfig; @@ -33,10 +34,12 @@ private InferenceService(InferenceResolver.Factory inferenceResolverFactory, Bul /** * Creates an inference resolver for resolving inference IDs in logical plans. * + * @param functionRegistry the function registry to resolve functions + * * @return a new inference resolver instance */ - public InferenceResolver inferenceResolver() { - return inferenceResolverFactory.create(); + public InferenceResolver inferenceResolver(EsqlFunctionRegistry functionRegistry) { + return inferenceResolverFactory.create(functionRegistry); } public BulkInferenceRunner bulkInferenceRunner() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 1d3b710baac35..4d47df6789e8c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -754,7 +754,7 @@ private void analyzeWithRetry( } private void resolveInferences(LogicalPlan plan, PreAnalysisResult preAnalysisResult, ActionListener l) { - inferenceService.inferenceResolver().resolveInferenceIds(plan, l.map(preAnalysisResult::withInferenceResolution)); + inferenceService.inferenceResolver(functionRegistry).resolveInferenceIds(plan, l.map(preAnalysisResult::withInferenceResolution)); } private PhysicalPlan logicalPlanToPhysicalPlan(LogicalPlan optimizedPlan, EsqlQueryRequest request) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java index 39917f849e6f2..470c9b8e3ed21 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java @@ -23,13 +23,13 @@ import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.junit.After; import org.junit.Before; -import java.util.HashSet; import java.util.List; -import java.util.Set; import static org.elasticsearch.xpack.esql.EsqlTestUtils.configuration; import static org.hamcrest.Matchers.contains; @@ -44,6 +44,7 @@ public class InferenceResolverTests extends ESTestCase { private TestThreadPool threadPool; + private EsqlFunctionRegistry functionRegistry; @Before public void setThreadPool() { @@ -60,6 +61,11 @@ public void setThreadPool() { ); } + @Before + public void setUpFunctionRegistry() { + functionRegistry = new EsqlFunctionRegistry(); + } + @After public void shutdownThreadPool() { terminate(threadPool); @@ -78,6 +84,26 @@ public void testCollectInferenceIds() { List.of("completion-inference-id") ); + if (EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()) { + // Text embedding inference plan + assertCollectInferenceIds( + "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(\"description\", \"text-embedding-inference-id\")", + List.of("text-embedding-inference-id") + ); + + // Test inference ID collection from an inference function + assertCollectInferenceIds( + "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(\"description\", \"text-embedding-inference-id\")", + List.of("text-embedding-inference-id") + ); + } + + // Test inference ID collection with nested functions + assertCollectInferenceIds( + "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(TEXT_EMBEDDING(\"nested\", \"nested-id\"), \"outer-id\")", + List.of("nested-id", "outer-id") + ); + // Multiple inference plans assertCollectInferenceIds(""" FROM books METADATA _score @@ -90,9 +116,8 @@ public void testCollectInferenceIds() { } private void assertCollectInferenceIds(String query, List expectedInferenceIds) { - Set inferenceIds = new HashSet<>(); InferenceResolver inferenceResolver = inferenceResolver(); - inferenceResolver.collectInferenceIds(new EsqlParser().createStatement(query, configuration(query)), inferenceIds::add); + List inferenceIds = inferenceResolver.collectInferenceIds(new EsqlParser().createStatement(query, configuration(query))); assertThat(inferenceIds, containsInAnyOrder(expectedInferenceIds.toArray(new String[0]))); } @@ -145,7 +170,7 @@ public void testResolveMultipleInferenceIds() throws Exception { public void testResolveMissingInferenceIds() throws Exception { InferenceResolver inferenceResolver = inferenceResolver(); - List inferenceIds = List.of("missing-plan"); + List inferenceIds = List.of("missing-inference-id"); SetOnce inferenceResolutionSetOnce = new SetOnce<>(); @@ -162,7 +187,7 @@ public void testResolveMissingInferenceIds() throws Exception { assertThat(inferenceResolution.resolvedInferences(), empty()); assertThat(inferenceResolution.hasError(), equalTo(true)); - assertThat(inferenceResolution.getError("missing-plan"), equalTo("inference endpoint not found")); + assertThat(inferenceResolution.getError("missing-inference-id"), equalTo("inference endpoint not found")); }); } @@ -210,7 +235,7 @@ private static ActionResponse getInferenceModelResponse(GetInferenceModelAction. } private InferenceResolver inferenceResolver() { - return new InferenceResolver(mockClient(), threadPool); + return new InferenceResolver(mockClient(), functionRegistry, threadPool); } private static ModelConfigurations mockModelConfig(String inferenceId, TaskType taskType) { From aadf8807114b921531ec9f62b2afede928207610 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 10:25:47 +0200 Subject: [PATCH 08/26] Analyzer now resolve inference endpoints for inference function. --- .../xpack/esql/analysis/Analyzer.java | 34 +++++- .../esql/analysis/AnalyzerTestUtils.java | 33 +++++- .../xpack/esql/analysis/AnalyzerTests.java | 109 ++++++++++++++++++ .../xpack/esql/analysis/VerifierTests.java | 48 ++++++++ 4 files changed, 220 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index f7bd49b75b4c5..a02a5565c46d0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -73,6 +73,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.SummationMode; import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; +import org.elasticsearch.xpack.esql.expression.function.inference.InferenceFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest; @@ -1414,7 +1415,8 @@ private static class ResolveInference extends ParameterizedRule resolveInferencePlan(p, context)); + return plan.transformDown(InferencePlan.class, p -> resolveInferencePlan(p, context)) + .transformExpressionsOnly(InferenceFunction.class, f -> resolveInferenceFunction(f, context)); } private LogicalPlan resolveInferencePlan(InferencePlan plan, AnalyzerContext context) { @@ -1443,6 +1445,36 @@ private LogicalPlan resolveInferencePlan(InferencePlan plan, AnalyzerContext return plan; } + + private InferenceFunction resolveInferenceFunction(InferenceFunction inferenceFunction, AnalyzerContext context) { + if (inferenceFunction.inferenceId().resolved() + && inferenceFunction.inferenceId().foldable() + && DataType.isString(inferenceFunction.inferenceId().dataType())) { + + String inferenceId = BytesRefs.toString(inferenceFunction.inferenceId().fold(FoldContext.small())); + ResolvedInference resolvedInference = context.inferenceResolution().getResolvedInference(inferenceId); + + if (resolvedInference == null) { + String error = context.inferenceResolution().getError(inferenceId); + return inferenceFunction.withInferenceResolutionError(inferenceId, error); + } + + if (resolvedInference.taskType() != inferenceFunction.taskType()) { + String error = "cannot use inference endpoint [" + + inferenceId + + "] with task type [" + + resolvedInference.taskType() + + "] within a " + + context.functionRegistry().snapshotRegistry().functionName(inferenceFunction.getClass()) + + " function. Only inference endpoints with the task type [" + + inferenceFunction.taskType() + + "] are supported."; + return inferenceFunction.withInferenceResolutionError(inferenceId, error); + } + } + + return inferenceFunction; + } } private static class AddImplicitLimit extends ParameterizedRule { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index fbfa18dccc477..78a8ca5483246 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -9,6 +9,7 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.core.type.EsField; @@ -26,6 +27,7 @@ import org.elasticsearch.xpack.esql.session.Configuration; import java.util.ArrayList; +import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -195,14 +197,39 @@ public static EnrichResolution defaultEnrichResolution() { return enrichResolution; } + public static final String RERANKING_INFERENCE_ID = "reranking-inference-id"; + public static final String COMPLETION_INFERENCE_ID = "completion-inference-id"; + public static final String TEXT_EMBEDDING_INFERENCE_ID = "text-embedding-inference-id"; + public static final String CHAT_COMPLETION_INFERENCE_ID = "chat-completion-inference-id"; + public static final String SPARSE_EMBEDDING_INFERENCE_ID = "sparse-embedding-inference-id"; + public static final List VALID_INFERENCE_IDS = List.of( + RERANKING_INFERENCE_ID, + COMPLETION_INFERENCE_ID, + TEXT_EMBEDDING_INFERENCE_ID, + CHAT_COMPLETION_INFERENCE_ID, + SPARSE_EMBEDDING_INFERENCE_ID + ); + public static final String ERROR_INFERENCE_ID = "error-inference-id"; + public static InferenceResolution defaultInferenceResolution() { return InferenceResolution.builder() - .withResolvedInference(new ResolvedInference("reranking-inference-id", TaskType.RERANK)) - .withResolvedInference(new ResolvedInference("completion-inference-id", TaskType.COMPLETION)) - .withError("error-inference-id", "error with inference resolution") + .withResolvedInference(new ResolvedInference(RERANKING_INFERENCE_ID, TaskType.RERANK)) + .withResolvedInference(new ResolvedInference(COMPLETION_INFERENCE_ID, TaskType.COMPLETION)) + .withResolvedInference(new ResolvedInference(TEXT_EMBEDDING_INFERENCE_ID, TaskType.TEXT_EMBEDDING)) + .withResolvedInference(new ResolvedInference(CHAT_COMPLETION_INFERENCE_ID, TaskType.CHAT_COMPLETION)) + .withResolvedInference(new ResolvedInference(SPARSE_EMBEDDING_INFERENCE_ID, TaskType.SPARSE_EMBEDDING)) + .withError(ERROR_INFERENCE_ID, "error with inference resolution") .build(); } + public static String randomInferenceId() { + return ESTestCase.randomFrom(VALID_INFERENCE_IDS); + } + + public static String randomInferenceId(String... excludes) { + return ESTestCase.randomValueOtherThanMany(Arrays.asList(excludes)::contains, AnalyzerTestUtils::randomInferenceId); + } + public static void loadEnrichPolicyResolution( EnrichResolution enrich, String policyType, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 662e1bdabcab2..79aa821ce4040 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -56,6 +56,7 @@ import org.elasticsearch.xpack.esql.expression.function.fulltext.QueryString; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; import org.elasticsearch.xpack.esql.expression.function.grouping.TBucket; +import org.elasticsearch.xpack.esql.expression.function.inference.TextEmbedding; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDateNanos; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDenseVector; @@ -123,6 +124,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.referenceAttribute; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.elasticsearch.xpack.esql.analysis.Analyzer.NO_FIELDS; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.TEXT_EMBEDDING_INFERENCE_ID; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyze; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzer; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzerDefaultMapping; @@ -130,6 +132,7 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultInferenceResolution; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexWithDateDateNanosUnionType; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomInferenceId; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution; import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; @@ -3765,6 +3768,112 @@ private void assertEmptyEsRelation(LogicalPlan plan) { assertThat(esRelation.output(), equalTo(NO_FIELDS)); } + public void testTextEmbeddingResolveInferenceId() { + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + LogicalPlan plan = analyze( + String.format(Locale.ROOT, """ + FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING("italian food recipe", "%s")""", TEXT_EMBEDDING_INFERENCE_ID), + "mapping-books.json" + ); + + Eval eval = as(as(plan, Limit.class).child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + Alias alias = as(eval.fields().get(0), Alias.class); + assertThat(alias.name(), equalTo("embedding")); + TextEmbedding function = as(alias.child(), TextEmbedding.class); + + assertThat(function.inputText(), equalTo(string("italian food recipe"))); + assertThat(function.inferenceId(), equalTo(string(TEXT_EMBEDDING_INFERENCE_ID))); + } + + public void testTextEmbeddingFunctionResolveType() { + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + LogicalPlan plan = analyze( + String.format(Locale.ROOT, """ + FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe", "%s")""", TEXT_EMBEDDING_INFERENCE_ID), + "mapping-books.json" + ); + + Eval eval = as(as(plan, Limit.class).child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + Alias alias = as(eval.fields().get(0), Alias.class); + assertThat(alias.name(), equalTo("embedding")); + + TextEmbedding function = as(alias.child(), TextEmbedding.class); + + assertThat(function.foldable(), equalTo(true)); + assertThat(function.dataType(), equalTo(DENSE_VECTOR)); + } + + public void testTextEmbeddingFunctionMissingInferenceIdError() { + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + VerificationException ve = expectThrows( + VerificationException.class, + () -> analyze( + String.format(Locale.ROOT, """ + FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe", "%s")""", "unknow-inference-id"), + "mapping-books.json" + ) + ); + + assertThat(ve.getMessage(), containsString("unresolved inference [unknow-inference-id]")); + } + + public void testTextEmbeddingFunctionInvalidInferenceIdError() { + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + String inferenceId = randomInferenceId(TEXT_EMBEDDING_INFERENCE_ID); + VerificationException ve = expectThrows( + VerificationException.class, + () -> analyze( + String.format(Locale.ROOT, """ + FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe", "%s")""", inferenceId), + "mapping-books.json" + ) + ); + + assertThat( + ve.getMessage(), + containsString(String.format(Locale.ROOT, "cannot use inference endpoint [%s] with task type", inferenceId)) + ); + } + + public void testTextEmbeddingFunctionWithoutModel() { + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + ParsingException ve = expectThrows(ParsingException.class, () -> analyze(""" + FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe")""", "mapping-books.json")); + + assertThat( + ve.getMessage(), + containsString(" error building [text_embedding]: function [text_embedding] expects exactly two arguments") + ); + } + + public void testKnnFunctionWithTextEmbedding() { + assumeTrue("KNN function capability required", EsqlCapabilities.Cap.KNN_FUNCTION_V5.isEnabled()); + assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + + LogicalPlan plan = analyze( + String.format(Locale.ROOT, """ + from test | where KNN(float_vector, TEXT_EMBEDDING("italian food recipe", "%s"))""", TEXT_EMBEDDING_INFERENCE_ID), + "mapping-dense_vector.json" + ); + + Limit limit = as(plan, Limit.class); + Filter filter = as(limit.child(), Filter.class); + Knn knn = as(filter.condition(), Knn.class); + assertThat(knn.field(), instanceOf(FieldAttribute.class)); + assertThat(((FieldAttribute) knn.field()).name(), equalTo("float_vector")); + + TextEmbedding textEmbedding = as(knn.query(), TextEmbedding.class); + assertThat(textEmbedding.inputText(), equalTo(string("italian food recipe"))); + assertThat(textEmbedding.inferenceId(), equalTo(string(TEXT_EMBEDDING_INFERENCE_ID))); + } + public void testResolveRerankInferenceId() { { LogicalPlan plan = analyze(""" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index ba2ef6d152e45..66c8b2d6e463c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -41,6 +41,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; import static org.elasticsearch.xpack.esql.EsqlTestUtils.paramAsConstant; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.TEXT_EMBEDDING_INFERENCE_ID; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; @@ -2675,6 +2676,53 @@ public void testSortInTimeSeries() { and the first aggregation [STATS avg(network.connections)] is not allowed""")); } + public void testTextEmbeddingFunctionInvalidQuery() { + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(null, ?)", defaultAnalyzer, TEXT_EMBEDDING_INFERENCE_ID), + equalTo("1:30: first argument of [TEXT_EMBEDDING(null, ?)] cannot be null, received [null]") + ); + + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(42, ?)", defaultAnalyzer, TEXT_EMBEDDING_INFERENCE_ID), + equalTo("1:30: first argument of [TEXT_EMBEDDING(42, ?)] must be [string], found value [42] type [integer]") + ); + + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(last_name, ?)", defaultAnalyzer, TEXT_EMBEDDING_INFERENCE_ID), + equalTo("1:30: first argument of [TEXT_EMBEDDING(last_name, ?)] must be a constant, received [last_name]") + ); + } + + public void testTextEmbeddingFunctionInvalidInferenceId() { + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(?, null)", defaultAnalyzer, "query text"), + equalTo("1:30: second argument of [TEXT_EMBEDDING(?, null)] cannot be null, received [null]") + ); + + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(?, 42)", defaultAnalyzer, "query text"), + equalTo("1:30: second argument of [TEXT_EMBEDDING(?, 42)] must be [string], found value [42] type [integer]") + ); + + assertThat( + error("from test | EVAL embedding = TEXT_EMBEDDING(?, last_name)", defaultAnalyzer, "query text"), + equalTo("1:30: second argument of [TEXT_EMBEDDING(?, last_name)] must be a constant, received [last_name]") + ); + } + + // public void testTextEmbeddingFunctionInvalidInferenceId() { + // assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); + // + // ParsingException ve = expectThrows(ParsingException.class, () -> analyze(""" + // FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe", CONCAT("machin", title))""", + // "mapping-books.json")); + // + // assertThat( + // ve.getMessage(), + // containsString(" error building [text_embedding]: function [text_embedding] expects exactly two arguments") + // ); + // } + private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); } From 6fb48b0cd79b5338868842050acc7bc5e82a2e46 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 19 Sep 2025 08:34:29 +0000 Subject: [PATCH 09/26] [CI] Auto commit changes from spotless --- .../xpack/esql/expression/function/EsqlFunctionRegistry.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index bf58f3750841f..74da67d069787 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -543,7 +543,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(Hamming.class, Hamming::new, "v_hamming"), def(UrlEncode.class, UrlEncode::new, "url_encode"), def(UrlDecode.class, UrlDecode::new, "url_decode"), - def(TextEmbedding.class, bi(TextEmbedding::new), "text_embedding")}}; + def(TextEmbedding.class, bi(TextEmbedding::new), "text_embedding") } }; } public EsqlFunctionRegistry snapshotRegistry() { From 2c423fb46885c2bc7ec3820722659902c2f968bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20FOUCRET?= Date: Fri, 19 Sep 2025 14:14:58 +0200 Subject: [PATCH 10/26] Apply suggestions from code review Co-authored-by: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com> --- .../function/inference/TextEmbedding.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index a5ef509df1dff..69c48d9c5e082 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -54,7 +54,7 @@ public class TextEmbedding extends InferenceFunction { ) public TextEmbedding( Source source, - @Param(name = "text", type = { "keyword", "text" }, description = "Text to embed") Expression inputText, + @Param(name = "text", type = { "keyword", "text" }, description = "Text to generate embeddings from") Expression inputText, @Param( name = InferenceFunction.INFERENCE_ID_PARAMETER_NAME, type = { "keyword", "text" }, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 66c8b2d6e463c..d5dc93a570b37 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2677,6 +2677,7 @@ public void testSortInTimeSeries() { } public void testTextEmbeddingFunctionInvalidQuery() { + assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); assertThat( error("from test | EVAL embedding = TEXT_EMBEDDING(null, ?)", defaultAnalyzer, TEXT_EMBEDDING_INFERENCE_ID), equalTo("1:30: first argument of [TEXT_EMBEDDING(null, ?)] cannot be null, received [null]") @@ -2694,6 +2695,7 @@ public void testTextEmbeddingFunctionInvalidQuery() { } public void testTextEmbeddingFunctionInvalidInferenceId() { + assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); assertThat( error("from test | EVAL embedding = TEXT_EMBEDDING(?, null)", defaultAnalyzer, "query text"), equalTo("1:30: second argument of [TEXT_EMBEDDING(?, null)] cannot be null, received [null]") @@ -2710,18 +2712,6 @@ public void testTextEmbeddingFunctionInvalidInferenceId() { ); } - // public void testTextEmbeddingFunctionInvalidInferenceId() { - // assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); - // - // ParsingException ve = expectThrows(ParsingException.class, () -> analyze(""" - // FROM books METADATA _score| EVAL embedding = TEXT_EMBEDDING("italian food recipe", CONCAT("machin", title))""", - // "mapping-books.json")); - // - // assertThat( - // ve.getMessage(), - // containsString(" error building [text_embedding]: function [text_embedding] expects exactly two arguments") - // ); - // } private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); From 9406d37f7b61e657f584054fe6b7c4f1ef4c71c3 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 14:21:23 +0200 Subject: [PATCH 11/26] Apply suggestion from review. --- .../elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java | 2 +- .../org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java index 78a8ca5483246..6f6c76efaf08e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTestUtils.java @@ -226,7 +226,7 @@ public static String randomInferenceId() { return ESTestCase.randomFrom(VALID_INFERENCE_IDS); } - public static String randomInferenceId(String... excludes) { + public static String randomInferenceIdOtherThan(String... excludes) { return ESTestCase.randomValueOtherThanMany(Arrays.asList(excludes)::contains, AnalyzerTestUtils::randomInferenceId); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 79aa821ce4040..b2f5e424bd165 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -132,7 +132,7 @@ import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultInferenceResolution; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexWithDateDateNanosUnionType; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; -import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomInferenceId; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomInferenceIdOtherThan; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution; import static org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; @@ -3825,7 +3825,7 @@ public void testTextEmbeddingFunctionMissingInferenceIdError() { public void testTextEmbeddingFunctionInvalidInferenceIdError() { assumeTrue("TEXT_EMBEDDING function required", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); - String inferenceId = randomInferenceId(TEXT_EMBEDDING_INFERENCE_ID); + String inferenceId = randomInferenceIdOtherThan(TEXT_EMBEDDING_INFERENCE_ID); VerificationException ve = expectThrows( VerificationException.class, () -> analyze( From 774986fb33b02a24622d6a7ff6cbc3d1f44fef92 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 14:24:18 +0200 Subject: [PATCH 12/26] TextEmbedding accepts only keyword parameters. --- .../esql/expression/function/inference/TextEmbedding.java | 4 ++-- .../org/elasticsearch/xpack/esql/analysis/VerifierTests.java | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index 69c48d9c5e082..6a97f2b4917a5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -54,10 +54,10 @@ public class TextEmbedding extends InferenceFunction { ) public TextEmbedding( Source source, - @Param(name = "text", type = { "keyword", "text" }, description = "Text to generate embeddings from") Expression inputText, + @Param(name = "text", type = { "keyword" }, description = "Text to generate embeddings from") Expression inputText, @Param( name = InferenceFunction.INFERENCE_ID_PARAMETER_NAME, - type = { "keyword", "text" }, + type = { "keyword" }, description = "Identifier of the inference endpoint" ) Expression inferenceId ) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d5dc93a570b37..370ef4721df9f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -2712,7 +2712,6 @@ public void testTextEmbeddingFunctionInvalidInferenceId() { ); } - private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); } From 8d4a832b1d11a036270b83a56200bdeed1ce7e47 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 15:05:24 +0200 Subject: [PATCH 13/26] Update TextEmbeddingTests supported parameters data types. --- .../inference/TextEmbeddingTests.java | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java index b6fdc7addf984..7b4d3b171d4cd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java @@ -20,7 +20,6 @@ import org.hamcrest.Matchers; import org.junit.Before; -import java.util.ArrayList; import java.util.List; import java.util.function.Supplier; @@ -40,29 +39,22 @@ public TextEmbeddingTests(@Name("TestCase") Supplier @ParametersFactory public static Iterable parameters() { - List suppliers = new ArrayList<>(); - - // Test all string type combinations for text input and inference endpoint ID - for (DataType inputTextDataType : DataType.stringTypes()) { - for (DataType inferenceIdDataType : DataType.stringTypes()) { - suppliers.add( - new TestCaseSupplier( - List.of(inputTextDataType, inferenceIdDataType), - () -> new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), inputTextDataType, "text"), - new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), inferenceIdDataType, "inference_id") - ), - Matchers.blankOrNullString(), - DENSE_VECTOR, - equalTo(true) - ) + return parameterSuppliersFromTypedData( + List.of( + new TestCaseSupplier( + List.of(DataType.KEYWORD, DataType.KEYWORD), + () -> new TestCaseSupplier.TestCase( + List.of( + new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), DataType.KEYWORD, "text"), + new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), DataType.KEYWORD, "inference_id") + ), + Matchers.blankOrNullString(), + DENSE_VECTOR, + equalTo(true) ) - ); - } - } - - return parameterSuppliersFromTypedData(suppliers); + ) + ) + ); } @Override From 71bae69a2a4a4363281d472bcedad1544b690039 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 16:00:52 +0200 Subject: [PATCH 14/26] Fix text embedding type validation. --- .../expression/function/inference/TextEmbedding.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index 6a97f2b4917a5..93526016a0ef9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -30,7 +30,7 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isFoldable; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; /** * TEXT_EMBEDDING function converts text to dense vector embeddings using an inference endpoint. @@ -108,14 +108,15 @@ protected TypeResolution resolveType() { } TypeResolution textResolution = isNotNull(inputText, sourceText(), FIRST).and(isFoldable(inputText, sourceText(), FIRST)) - .and(isString(inputText, sourceText(), FIRST)); + .and(isType(inputText, DataType.KEYWORD::equals, sourceText(), FIRST, "string")); if (textResolution.unresolved()) { return textResolution; } - TypeResolution inferenceIdResolution = isNotNull(inferenceId, sourceText(), SECOND).and(isString(inferenceId, sourceText(), SECOND)) - .and(isFoldable(inferenceId, sourceText(), SECOND)); + TypeResolution inferenceIdResolution = isNotNull(inferenceId, sourceText(), SECOND).and( + isType(inferenceId, DataType.KEYWORD::equals, sourceText(), SECOND, "string") + ).and(isFoldable(inferenceId, sourceText(), SECOND)); if (inferenceIdResolution.unresolved()) { return inferenceIdResolution; From 1275582f0fdf613fe0a4250eaef88bae756f6816 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 21:08:25 +0200 Subject: [PATCH 15/26] Add a dummy example (waiting for real CSV tests to be implemented) --- .../src/main/resources/text-embedding.csv-spec | 14 ++++++++++++++ .../function/inference/TextEmbedding.java | 9 ++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec new file mode 100644 index 0000000000000..3ccc1b34b7eab --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec @@ -0,0 +1,14 @@ +placeholder +required_capability: not_existing_capability + +// tag::embedding-eval[] +ROW input="Who is Victor Hugo?" +| EVAL embedding = TEXT_EMBEDDING("Who is Victor Hugo?", "test_dense_inference") +; +// end::embedding-eval[] + + +input:keyword | embedding:dense_vector +Who is Victor Hugo? | [56.0, 50.0, 48.0] +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index 93526016a0ef9..ff573734d0444 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; @@ -50,7 +51,13 @@ public class TextEmbedding extends InferenceFunction { returnType = "dense_vector", description = "Generates dense vector embeddings for text using a specified inference endpoint.", appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) }, - preview = true + preview = true, + examples = { + @Example( + description = "Generate text embeddings using the 'test_dense_inference' inference endpoint.", + file = "text-embedding", + tag = "embedding-eval" + ) } ) public TextEmbedding( Source source, From 89dfcecc84913bfa16cd1d095c083615c5567aee Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 21:08:28 +0200 Subject: [PATCH 16/26] Add a dummy example (waiting for real CSV tests to be implemented) --- .../esql/kibana/definition/functions/text_embedding.json | 3 +++ .../esql/kibana/docs/functions/text_embedding.md | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json index 9e4967b92c367..5f1f68a2b14bd 100644 --- a/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json +++ b/docs/reference/query-languages/esql/kibana/definition/functions/text_embedding.json @@ -4,6 +4,9 @@ "name" : "text_embedding", "description" : "Generates dense vector embeddings for text using a specified inference endpoint.", "signatures" : [ ], + "examples" : [ + "ROW input=\"Who is Victor Hugo?\"\n| EVAL embedding = TEXT_EMBEDDING(\"Who is Victor Hugo?\", \"test_dense_inference\")\n;" + ], "preview" : true, "snapshot_only" : true } diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md b/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md index bb3e74fc116cd..f8981fb3be66a 100644 --- a/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md +++ b/docs/reference/query-languages/esql/kibana/docs/functions/text_embedding.md @@ -2,3 +2,9 @@ ### TEXT EMBEDDING Generates dense vector embeddings for text using a specified inference endpoint. + +```esql +ROW input="Who is Victor Hugo?" +| EVAL embedding = TEXT_EMBEDDING("Who is Victor Hugo?", "test_dense_inference") +; +``` From 39919fa6e7510dea900d5d4e386aeda798b3d70c Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 21:10:19 +0200 Subject: [PATCH 17/26] Fix breaking release tests. --- .../xpack/esql/inference/InferenceResolverTests.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java index 470c9b8e3ed21..f14b053134fed 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/inference/InferenceResolverTests.java @@ -96,13 +96,13 @@ public void testCollectInferenceIds() { "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(\"description\", \"text-embedding-inference-id\")", List.of("text-embedding-inference-id") ); - } - // Test inference ID collection with nested functions - assertCollectInferenceIds( - "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(TEXT_EMBEDDING(\"nested\", \"nested-id\"), \"outer-id\")", - List.of("nested-id", "outer-id") - ); + // Test inference ID collection with nested functions + assertCollectInferenceIds( + "FROM books METADATA _score | EVAL embedding = TEXT_EMBEDDING(TEXT_EMBEDDING(\"nested\", \"nested-id\"), \"outer-id\")", + List.of("nested-id", "outer-id") + ); + } // Multiple inference plans assertCollectInferenceIds(""" From b7e821d367650a4bbe65bda2ad16fd621a4d7c65 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 21:28:17 +0200 Subject: [PATCH 18/26] Made the code more readable. --- .../esql/inference/InferenceResolver.java | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java index cb6fdefec7c10..175e9252f6055 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/inference/InferenceResolver.java @@ -172,23 +172,26 @@ private void collectInferenceIdsFromInferenceFunctions(LogicalPlan plan, Consume * @return The inference ID as a string */ private static String inferenceId(InferencePlan plan) { - return inferenceId(plan.inferenceId()); + return BytesRefs.toString(plan.inferenceId().fold(FoldContext.small())); } - private static String inferenceId(Expression e) { - return BytesRefs.toString(e.fold(FoldContext.small())); - } - - public String inferenceId(UnresolvedFunction f, FunctionDefinition def) { + /** + * Extracts the inference ID from an InferenceFunction expression that is not yet resolved. + * + * @param f The UnresolvedFunction expression representing the inference function + * @param def The FunctionDefinition of the inference function + * @return The inference ID as a string, or null if not found or invalid + */ + private static String inferenceId(UnresolvedFunction f, FunctionDefinition def) { EsqlFunctionRegistry.FunctionDescription functionDescription = EsqlFunctionRegistry.description(def); for (int i = 0; i < functionDescription.args().size(); i++) { EsqlFunctionRegistry.ArgSignature arg = functionDescription.args().get(i); if (arg.name().equals(InferenceFunction.INFERENCE_ID_PARAMETER_NAME)) { - Expression argValue = f.arguments().get(i); - if (argValue != null && argValue.foldable() && DataType.isString(argValue.dataType())) { - return inferenceId(argValue); + Expression inferenceId = f.arguments().get(i); + if (inferenceId != null && inferenceId.foldable() && DataType.isString(inferenceId.dataType())) { + return BytesRefs.toString(inferenceId.fold(FoldContext.small())); } } } From 97ecf8093f694567b7534530c031d1bd3f5d2fee Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 23:44:49 +0200 Subject: [PATCH 19/26] Filter out TEXT_EMBEDDING FROM CSV TESTS --- .../src/test/java/org/elasticsearch/xpack/esql/CsvTests.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index d445cf443dc06..1d73396017170 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -331,6 +331,10 @@ public final void test() throws Throwable { "CSV tests cannot currently handle FORK", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.FORK_V9.capabilityName()) ); + assumeFalse( + "CSV tests cannot currently handle TEXT_EMBEDDING function", + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.capabilityName()) + ); assumeFalse( "CSV tests cannot currently handle multi_match function that depends on Lucene", testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.MULTI_MATCH_FUNCTION.capabilityName()) From d5cf81ca6765357850d3cb8f3d0541e6dfa56963 Mon Sep 17 00:00:00 2001 From: afoucret Date: Fri, 19 Sep 2025 23:53:52 +0200 Subject: [PATCH 20/26] Fixing CSV tests --- .../_snippets/functions/parameters/text_embedding.md | 10 ++++++++++ .../src/main/resources/text-embedding.csv-spec | 1 + 2 files changed, 11 insertions(+) create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md new file mode 100644 index 0000000000000..e2b852912c5f5 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/text_embedding.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See + +**Parameters** + +`text` +: Text to generate embeddings from + +`inference_id` +: Identifier of the inference endpoint + diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec index 3ccc1b34b7eab..f026800598e10 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/text-embedding.csv-spec @@ -1,4 +1,5 @@ placeholder +required_capability: text_embedding_function required_capability: not_existing_capability // tag::embedding-eval[] From 0d01b5e57f2e7deadcd02f744b337b13891f0ea2 Mon Sep 17 00:00:00 2001 From: afoucret Date: Wed, 24 Sep 2025 10:19:05 +0200 Subject: [PATCH 21/26] Fix typo --- .../xpack/esql/expression/function/EsqlFunctionRegistry.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 6c3512911f9f5..0224edcb4cfe3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -543,7 +543,7 @@ private static FunctionDefinition[][] snapshotFunctions() { def(Magnitude.class, Magnitude::new, "v_magnitude"), def(Hamming.class, Hamming::new, "v_hamming"), def(UrlEncode.class, UrlEncode::new, "url_encode"), - def(UrlEncodeComponent.class, UrlEncodeComponent::new, "url_encode_component") + def(UrlEncodeComponent.class, UrlEncodeComponent::new, "url_encode_component"), def(UrlDecode.class, UrlDecode::new, "url_decode"), def(TextEmbedding.class, bi(TextEmbedding::new), "text_embedding") } }; } From e8ca515b43532ea340b675683b70e47b5d92b606 Mon Sep 17 00:00:00 2001 From: afoucret Date: Wed, 24 Sep 2025 11:08:16 +0200 Subject: [PATCH 22/26] Make TextEmbedding not serializable. --- .../esql/expression/ExpressionWritables.java | 6 --- .../inference/InferenceWritables.java | 36 --------------- .../function/inference/TextEmbedding.java | 25 ++-------- .../TextEmbeddingSerializationTests.java | 46 ------------------- .../inference/TextEmbeddingTests.java | 6 ++- 5 files changed, 8 insertions(+), 111 deletions(-) delete mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java delete mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java index 704a56b0ff6ba..c14e8168884a9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/ExpressionWritables.java @@ -13,7 +13,6 @@ import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateWritables; import org.elasticsearch.xpack.esql.expression.function.fulltext.FullTextWritables; -import org.elasticsearch.xpack.esql.expression.function.inference.InferenceWritables; import org.elasticsearch.xpack.esql.expression.function.scalar.ScalarFunctionWritables; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FromBase64; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble; @@ -122,7 +121,6 @@ public static List getNamedWriteables() { entries.addAll(fullText()); entries.addAll(unaryScalars()); entries.addAll(vector()); - entries.addAll(inference()); return entries; } @@ -269,8 +267,4 @@ private static List fullText() { private static List vector() { return VectorWritables.getNamedWritables(); } - - private static List inference() { - return InferenceWritables.getNamedWritables(); - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java deleted file mode 100644 index 9809ef0d46b66..0000000000000 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/InferenceWritables.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.function.inference; - -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Defines the named writables for inference functions in ESQL. - */ -public final class InferenceWritables { - - private InferenceWritables() { - // Utility class - throw new UnsupportedOperationException(); - } - - public static List getNamedWritables() { - List entries = new ArrayList<>(); - - if (EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()) { - entries.add(TextEmbedding.ENTRY); - } - - return Collections.unmodifiableList(entries); - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index ff573734d0444..0c7debcd2a58d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.esql.expression.function.inference; -import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -16,12 +14,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.Example; -import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; -import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; -import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; -import org.elasticsearch.xpack.esql.expression.function.Param; -import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.expression.function.*; import java.io.IOException; import java.util.List; @@ -38,12 +31,6 @@ */ public class TextEmbedding extends InferenceFunction { - public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( - Expression.class, - "TextEmbedding", - TextEmbedding::new - ); - private final Expression inferenceId; private final Expression inputText; @@ -73,20 +60,14 @@ public TextEmbedding( this.inputText = inputText; } - private TextEmbedding(StreamInput in) throws IOException { - this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class)); - } - @Override public void writeTo(StreamOutput out) throws IOException { - source().writeTo(out); - out.writeNamedWriteable(inputText); - out.writeNamedWriteable(inferenceId); + throw new UnsupportedOperationException("doesn't escape the node"); } @Override public String getWriteableName() { - return ENTRY.name; + throw new UnsupportedOperationException("doesn't escape the node"); } public Expression inputText() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java deleted file mode 100644 index 5d7e1dfa4301a..0000000000000 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingSerializationTests.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.function.inference; - -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; -import org.junit.Before; - -import java.io.IOException; - -/** Tests serialization/deserialization of TEXT_EMBEDDING function instances. */ -public class TextEmbeddingSerializationTests extends AbstractExpressionSerializationTests { - - @Before - public void checkCapability() { - assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); - } - - @Override - protected TextEmbedding createTestInstance() { - Source source = randomSource(); - Expression inputText = randomChild(); - Expression inferenceId = randomChild(); - return new TextEmbedding(source, inputText, inferenceId); - } - - @Override - protected TextEmbedding mutateInstance(TextEmbedding instance) throws IOException { - Source source = instance.source(); - Expression inputText = instance.inputText(); - Expression inferenceId = instance.inferenceId(); - if (randomBoolean()) { - inputText = randomValueOtherThan(inputText, AbstractExpressionSerializationTests::randomChild); - } else { - inferenceId = randomValueOtherThan(inferenceId, AbstractExpressionSerializationTests::randomChild); - } - return new TextEmbedding(source, inputText, inferenceId); - } -} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java index 7b4d3b171d4cd..85c3ee1fd549c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java @@ -9,7 +9,6 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; - import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -61,4 +60,9 @@ public static Iterable parameters() { protected Expression build(Source source, List args) { return new TextEmbedding(source, args.get(0), args.get(1)); } + + @Override + protected boolean canSerialize() { + return false; + } } From 5b562325b77e9a3eb633ce36839ab96185ec8132 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 24 Sep 2025 09:15:24 +0000 Subject: [PATCH 23/26] [CI] Auto commit changes from spotless --- .../esql/expression/function/inference/TextEmbeddingTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java index 85c3ee1fd549c..87d3dd28e6107 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java @@ -9,6 +9,7 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; From 128688beb3f0e9fe0ff2e786672e1d5466452ce5 Mon Sep 17 00:00:00 2001 From: afoucret Date: Wed, 24 Sep 2025 11:34:30 +0200 Subject: [PATCH 24/26] Remove failing tests as it is failing. --- .../inference/TextEmbeddingTests.java | 69 ------------------- 1 file changed, 69 deletions(-) delete mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java deleted file mode 100644 index 87d3dd28e6107..0000000000000 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingTests.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.function.inference; - -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; - -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; -import org.elasticsearch.xpack.esql.expression.function.FunctionName; -import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import org.hamcrest.Matchers; -import org.junit.Before; - -import java.util.List; -import java.util.function.Supplier; - -import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; -import static org.hamcrest.Matchers.equalTo; - -@FunctionName("text_embedding") -public class TextEmbeddingTests extends AbstractFunctionTestCase { - @Before - public void checkCapability() { - assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); - } - - public TextEmbeddingTests(@Name("TestCase") Supplier testCaseSupplier) { - this.testCase = testCaseSupplier.get(); - } - - @ParametersFactory - public static Iterable parameters() { - return parameterSuppliersFromTypedData( - List.of( - new TestCaseSupplier( - List.of(DataType.KEYWORD, DataType.KEYWORD), - () -> new TestCaseSupplier.TestCase( - List.of( - new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), DataType.KEYWORD, "text"), - new TestCaseSupplier.TypedData(randomBytesReference(10).toBytesRef(), DataType.KEYWORD, "inference_id") - ), - Matchers.blankOrNullString(), - DENSE_VECTOR, - equalTo(true) - ) - ) - ) - ); - } - - @Override - protected Expression build(Source source, List args) { - return new TextEmbedding(source, args.get(0), args.get(1)); - } - - @Override - protected boolean canSerialize() { - return false; - } -} From a8b99c4600889b724c9575504351554e29fa709a Mon Sep 17 00:00:00 2001 From: afoucret Date: Wed, 24 Sep 2025 12:18:35 +0200 Subject: [PATCH 25/26] Remove start import --- .../esql/expression/function/inference/TextEmbedding.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java index 0c7debcd2a58d..ab8b8c8c3f3c9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbedding.java @@ -14,7 +14,11 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.*; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; import java.io.IOException; import java.util.List; From a0cc9653e76427c9ad7b9ba3b932bcb5960200b7 Mon Sep 17 00:00:00 2001 From: afoucret Date: Wed, 24 Sep 2025 13:25:53 +0200 Subject: [PATCH 26/26] Remove test --- .../inference/TextEmbeddingErrorTests.java | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java deleted file mode 100644 index 9af017bd5207f..0000000000000 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/inference/TextEmbeddingErrorTests.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.expression.function.inference; - -import org.elasticsearch.xpack.esql.action.EsqlCapabilities; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; -import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.AbstractFunctionTestCase; -import org.elasticsearch.xpack.esql.expression.function.ErrorsForCasesWithoutExamplesTestCase; -import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import org.hamcrest.Matcher; -import org.junit.Before; - -import java.util.List; -import java.util.Locale; -import java.util.Set; - -import static org.hamcrest.Matchers.equalTo; - -/** Tests error conditions and type validation for TEXT_EMBEDDING function. */ -public class TextEmbeddingErrorTests extends ErrorsForCasesWithoutExamplesTestCase { - - @Before - public void checkCapability() { - assumeTrue("TEXT_EMBEDDING is not enabled", EsqlCapabilities.Cap.TEXT_EMBEDDING_FUNCTION.isEnabled()); - } - - @Override - protected List cases() { - return paramsToSuppliers(TextEmbeddingTests.parameters()); - } - - @Override - protected Expression build(Source source, List args) { - return new TextEmbedding(source, args.get(0), args.get(1)); - } - - @Override - protected Matcher expectedTypeErrorMatcher(List> validPerPosition, List signature) { - return equalTo(typeErrorMessage(true, validPerPosition, signature, (v, p) -> "string")); - } - - protected static String typeErrorMessage( - boolean includeOrdinal, - List> validPerPosition, - List signature, - AbstractFunctionTestCase.PositionalErrorMessageSupplier positionalErrorMessageSupplier - ) { - for (int i = 0; i < signature.size(); i++) { - if (signature.get(i) == DataType.NULL) { - String ordinal = includeOrdinal ? TypeResolutions.ParamOrdinal.fromIndex(i).name().toLowerCase(Locale.ROOT) + " " : ""; - return ordinal + "argument of [" + sourceForSignature(signature) + "] cannot be null, received []"; - } - - if (validPerPosition.get(i).contains(signature.get(i)) == false) { - break; - } - } - - return ErrorsForCasesWithoutExamplesTestCase.typeErrorMessage( - includeOrdinal, - validPerPosition, - signature, - positionalErrorMessageSupplier - ); - } -}