diff --git a/docs/changelog/137923.yaml b/docs/changelog/137923.yaml new file mode 100644 index 0000000000000..6b6989d3de32a --- /dev/null +++ b/docs/changelog/137923.yaml @@ -0,0 +1,5 @@ +pr: 137923 +summary: Propagate converted fields through projections +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index 5b9da2f13aa59..6e3011f0f5b7c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -2548,3 +2548,25 @@ true | 1990-08-01T00:00:00.000Z true | 1990-09-01T00:00:00.000Z true | 1990-12-01T00:00:00.000Z ; + +unionTypesResolvePastProjections +required_capability: union_types +required_capability: casting_operator +required_capability: union_types_resolve_past_projections + +FROM apps, apps_short +| KEEP id, name +| MV_EXPAND name +| EVAL id = id::integer +| KEEP id, name +| SORT id ASC, name ASC +| LIMIT 5 +; + +id:integer | name:keyword +1 | aaaaa +1 | aaaaa +2 | bbbbb +2 | bbbbb +3 | ccccc +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index aaa0fa504a545..08d6f3f303fd4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -552,6 +552,13 @@ public enum Cap { */ UNION_TYPES_NUMERIC_WIDENING, + /** + * Fix for resolving union type casts past projections (KEEP) and MV_EXPAND operations. + * Ensures that casting a union type field works correctly when the field has been projected + * and expanded through MV_EXPAND. See #137923 + */ + UNION_TYPES_RESOLVE_PAST_PROJECTIONS, + /** * Fix a parsing issue where numbers below Long.MIN_VALUE threw an exception instead of parsing as doubles. * see Parsing large numbers is inconsistent #104323 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 418dc2fa8734b..3ee35debabab5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -2109,7 +2109,7 @@ private LogicalPlan doRule(LogicalPlan plan, List u * and thereby get used in FieldExtractExec */ private static LogicalPlan addGeneratedFieldsToEsRelations(LogicalPlan plan, List unionFieldAttributes) { - return plan.transformDown(EsRelation.class, esr -> { + var res = plan.transformDown(EsRelation.class, esr -> { List missing = new ArrayList<>(); for (FieldAttribute fa : unionFieldAttributes) { // Using outputSet().contains looks by NameId, resp. uses semanticEquals. @@ -2123,6 +2123,25 @@ private static LogicalPlan addGeneratedFieldsToEsRelations(LogicalPlan plan, Lis } return esr; }); + if (res.equals(plan) == false) { + res = res.transformUp(Project.class, p -> { + List syntheticAttributesToCarryOver = new ArrayList<>(); + for (Attribute attr : p.inputSet()) { + if (attr.synthetic() && p.outputSet().contains(attr) == false) { + syntheticAttributesToCarryOver.add(attr); + } + } + + if (syntheticAttributesToCarryOver.isEmpty()) { + return p; + } + + List newProjections = new ArrayList<>(p.projections()); + newProjections.addAll(syntheticAttributesToCarryOver); + return new Project(p.source(), p.child(), newProjections); + }); + } + return res; } private Expression resolveConvertFunction(ConvertFunction convert, List unionFieldAttributes) { @@ -2286,16 +2305,17 @@ private static Expression typeSpecificConvert(ConvertFunction convert, Source so */ private static class UnionTypesCleanup extends Rule { public LogicalPlan apply(LogicalPlan plan) { - LogicalPlan planWithCheckedUnionTypes = plan.transformUp( + + // We start by dropping synthetic attributes if the plan is resolved + LogicalPlan cleanPlan = plan.resolved() ? planWithoutSyntheticAttributes(plan) : plan; + + // If not, we apply checkUnresolved to the field attributes of the original plan, resulting in unsupported attributes + // This removes attributes such as converted types if they are aliased, but retains them otherwise, while also guaranteeing that + // unsupported / unresolved fields can be explicitly retained + return cleanPlan.transformUp( LogicalPlan.class, p -> p.transformExpressionsOnly(FieldAttribute.class, UnionTypesCleanup::checkUnresolved) ); - - // To drop synthetic attributes at the end, we need to compute the plan's output. - // This is only legal to do if the plan is resolved. - return planWithCheckedUnionTypes.resolved() - ? planWithoutSyntheticAttributes(planWithCheckedUnionTypes) - : planWithCheckedUnionTypes; } static Attribute checkUnresolved(FieldAttribute fa) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index fa5c6b3c3c610..df34af303435d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -119,6 +119,7 @@ import java.nio.charset.StandardCharsets; import java.time.Period; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -4510,6 +4511,107 @@ public void testBucketWithIntervalInStringInGroupingReferencedInAggregation() { assertEquals(oneYear, literal); } + public void testProjectionForUnionTypeResolution() { + LinkedHashMap> typesToIndices = new LinkedHashMap<>(); + typesToIndices.put("keyword", Set.of("union_index_1")); + typesToIndices.put("integer", Set.of("union_index_2")); + + EsField idField = new InvalidMappedField("id", typesToIndices); + EsField fooField = new EsField("foo", DataType.KEYWORD, Map.of(), true, EsField.TimeSeriesFieldType.NONE); + + EsIndex index = new EsIndex( + "union_index*", + Map.of("id", idField, "foo", fooField), // Updated mapping keys + Map.of("union_index_1", IndexMode.STANDARD, "union_index_2", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of() + ); + IndexResolution resolution = IndexResolution.valid(index); + Analyzer analyzer = analyzer(resolution); + + String query = "FROM union_index* | KEEP id, foo | MV_EXPAND foo | EVAL id = id::keyword"; + LogicalPlan plan = analyze(query, analyzer); + + Project project = as(plan, Project.class); + Eval eval = as(project.child().children().getFirst(), Eval.class); + FieldAttribute convertedFa = as(eval.output().get(1), FieldAttribute.class); + + // The synthetic field used for the conversion should be propagated through intermediate nodes (like MV_EXPAND) but ultimately + // stripped from the final output, leaving only the aliased 'id' and 'foo'. + verifyNameAndType(convertedFa.name(), convertedFa.dataType(), "$$id$converted_to$keyword", KEYWORD); + + eval.forEachDown(Project.class, p -> { + if (p.inputSet().contains(convertedFa)) { + assertTrue(p.outputSet().contains(convertedFa)); + } + }); + + var output = plan.output(); + assertThat(output, hasSize(2)); + + var fooAttr = output.getFirst(); + var idAttr = output.getLast(); + assertThat(fooAttr.dataType(), equalTo(KEYWORD)); + assertThat(idAttr.dataType(), equalTo(KEYWORD)); + assertThat(idAttr.name(), equalTo("id")); + } + + public void testExplicitRetainOriginalFieldWithCast() { + // Use the existing union index fixture (id has keyword/integer union types) + LinkedHashMap> typesToIndices = new LinkedHashMap<>(); + typesToIndices.put("keyword", Set.of("test1")); + typesToIndices.put("integer", Set.of("test2")); + EsField idField = new InvalidMappedField("id", typesToIndices); + EsIndex index = new EsIndex( + "union_index*", + Map.of("id", idField), + Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of() + ); + IndexResolution resolution = IndexResolution.valid(index); + Analyzer analyzer = analyzer(resolution); + + String query = """ + FROM union_index* + | KEEP id + | EVAL x = id::long + """; + LogicalPlan plan = analyze(query, analyzer); + + Project topProject = as(plan, Project.class); + var projections = topProject.projections(); + assertThat(projections, hasSize(2)); + assertThat(projections.get(0).name(), equalTo("id")); + assertThat(projections.get(0).dataType(), equalTo(UNSUPPORTED)); + + ReferenceAttribute xRef = as(projections.get(1), ReferenceAttribute.class); + assertThat(xRef.name(), equalTo("x")); + assertThat(xRef.dataType(), equalTo(LONG)); + + Limit limit = as(topProject.child(), Limit.class); + Eval eval = as(limit.child(), Eval.class); + Alias xAlias = as(eval.fields().get(0), Alias.class); + assertThat(xAlias.name(), equalTo("x")); + FieldAttribute syntheticFieldAttr = as(xAlias.child(), FieldAttribute.class); + assertThat(syntheticFieldAttr.name(), equalTo("$$id$converted_to$long")); + assertThat(xRef, is(xAlias.toAttribute())); + + Project innerProject = as(eval.child(), Project.class); + EsRelation relation = as(innerProject.child(), EsRelation.class); + assertEquals("union_index*", relation.indexPattern()); + var relationOutput = relation.output(); + assertThat(relationOutput, hasSize(2)); + assertThat(relationOutput.get(0).name(), equalTo("id")); + assertThat(relationOutput.get(0).dataType(), equalTo(UNSUPPORTED)); + var syntheticField = relationOutput.get(1); + assertThat(syntheticField.name(), equalTo("$$id$converted_to$long")); + assertThat(syntheticField.dataType(), equalTo(LONG)); + assertThat(syntheticFieldAttr.id(), equalTo(syntheticField.id())); + } + public void testImplicitCastingForDateAndDateNanosFields() { IndexResolution indexWithUnionTypedFields = indexWithDateDateNanosUnionType(); Analyzer analyzer = AnalyzerTestUtils.analyzer(indexWithUnionTypedFields); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java index 02ee388ea6128..0e31996ed901c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/AbstractLogicalPlanOptimizerTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.xpack.esql.analysis.EnrichResolution; import org.elasticsearch.xpack.esql.analysis.MutableAnalyzerContext; import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.EsIndexGenerator; @@ -26,6 +27,7 @@ import org.junit.BeforeClass; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -64,6 +66,7 @@ public abstract class AbstractLogicalPlanOptimizerTests extends ESTestCase { protected static Map metricMapping; protected static Analyzer metricsAnalyzer; protected static Analyzer multiIndexAnalyzer; + protected static Analyzer unionIndexAnalyzer; protected static Analyzer sampleDataIndexAnalyzer; protected static Analyzer subqueryAnalyzer; protected static Map mappingBaseConversion; @@ -217,6 +220,32 @@ public static void init() { TEST_VERIFIER ); + // Create a union index with conflicting types (keyword vs integer) for field 'id' + LinkedHashMap> typesToIndices = new LinkedHashMap<>(); + typesToIndices.put("keyword", Set.of("test1")); + typesToIndices.put("integer", Set.of("test2")); + EsField idField = new InvalidMappedField("id", typesToIndices); + EsField fooField = new EsField("foo", KEYWORD, emptyMap(), true, EsField.TimeSeriesFieldType.NONE); + var unionIndex = new EsIndex( + "union_index*", + Map.of("id", idField, "foo", fooField), + Map.of("test1", IndexMode.STANDARD, "test2", IndexMode.STANDARD), + Map.of(), + Map.of(), + Set.of() + ); + unionIndexAnalyzer = new Analyzer( + testAnalyzerContext( + EsqlTestUtils.TEST_CFG, + new EsqlFunctionRegistry(), + indexResolutions(unionIndex), + defaultLookupResolution(), + enrichResolution, + emptyInferenceResolution() + ), + TEST_VERIFIER + ); + var sampleDataMapping = loadMapping("mapping-sample_data.json"); var sampleDataIndex = new EsIndex( "sample_data", @@ -313,6 +342,10 @@ protected LogicalPlan planMultiIndex(String query) { return logicalOptimizer.optimize(multiIndexAnalyzer.analyze(parser.parseQuery(query))); } + protected LogicalPlan planUnionIndex(String query) { + return logicalOptimizer.optimize(unionIndexAnalyzer.analyze(parser.parseQuery(query))); + } + protected LogicalPlan planSample(String query) { var analyzed = sampleDataIndexAnalyzer.analyze(parser.parseQuery(query)); return logicalOptimizer.optimize(analyzed); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 5d3a44bf4f956..c39034ab4d239 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -203,7 +203,9 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; +import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.EQ; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GT; import static org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.EsqlBinaryComparison.BinaryComparisonOperation.GTE; @@ -9855,6 +9857,97 @@ public void testFullTextFunctionOnEvalNull() { assertEquals("test", relation.indexPattern()); } + /** + * Project[[foo{r}#12, $$id$converted_to$keyword{f$}#13 AS id#9]] + * \_Limit[1000[INTEGER],true,false] + * \_MvExpand[foo{f}#11,foo{r}#12] + * \_Project[[!id, foo{f}#11, $$id$converted_to$keyword{f$}#13]] + * \_Limit[1000[INTEGER],false,false] + * \_EsRelation[union_index*][foo{f}#11, !id, $$id$converted_to$keyword{f$}#13] + */ + public void testUnionTypesResolvePastProjections() { + LogicalPlan plan = planUnionIndex(""" + FROM union_index* + | KEEP id, foo + | MV_EXPAND foo + | EVAL id = id::keyword + """); + + Project topProject = as(plan, Project.class); + var topOutput = topProject.output(); + assertThat(topOutput, hasSize(2)); + + var idAttr = topOutput.get(1); + assertThat(idAttr.name(), equalTo("id")); + + // The id attribute should be a ReferenceAttribute that references the converted field + ReferenceAttribute idRef = as(idAttr, ReferenceAttribute.class); + assertThat(idRef.dataType(), equalTo(KEYWORD)); + + Limit limit1 = asLimit(topProject.child(), 1000, true); + MvExpand mvExpand = as(limit1.child(), MvExpand.class); + + Project innerProject = as(mvExpand.child(), Project.class); + var innerOutput = innerProject.output(); + assertThat(innerOutput, hasSize(3)); + assertThat(Expressions.names(innerOutput), containsInAnyOrder("id", "foo", "$$id$converted_to$keyword")); + + Limit limit2 = asLimit(innerProject.child(), 1000, false); + EsRelation relation = as(limit2.child(), EsRelation.class); + assertEquals("union_index*", relation.indexPattern()); + + var relationOutput = relation.output(); + assertThat(relationOutput, hasSize(3)); + + assertThat(relationOutput.get(0).name(), equalTo("foo")); + assertThat(relationOutput.get(0).dataType(), equalTo(KEYWORD)); + + assertThat(relationOutput.get(1).name(), equalTo("id")); + assertThat(relationOutput.get(1).dataType(), equalTo(UNSUPPORTED)); + + assertThat(relationOutput.get(2).name(), equalTo("$$id$converted_to$keyword")); + assertThat(relationOutput.get(2).dataType(), equalTo(KEYWORD)); + } + + /** + * Project[[!id, $$id$converted_to$long{f$}#9 AS x#6]] + * \_Limit[1000[INTEGER],false,false] + * \_EsRelation[union_index*][foo{f}#7, !id, $$id$converted_to$long{f$}#9] + */ + public void testExplicitRetainOriginalFieldWithCast() { + LogicalPlan plan = planUnionIndex(""" + FROM union_index* + | KEEP id + | EVAL x = id::long + """); + + Project topProject = as(plan, Project.class); + var projections = topProject.projections(); + assertThat(projections, hasSize(2)); + assertThat(projections.get(0).name(), equalTo("id")); + assertThat(projections.get(0).dataType(), equalTo(UNSUPPORTED)); + + Alias xAlias = as(projections.get(1), Alias.class); + assertThat(xAlias.name(), equalTo("x")); + assertThat(xAlias.dataType(), equalTo(LONG)); + FieldAttribute syntheticFieldAttr = as(xAlias.child(), FieldAttribute.class); + assertThat(syntheticFieldAttr.name(), equalTo("$$id$converted_to$long")); + ReferenceAttribute xRef = as(topProject.output().get(1), ReferenceAttribute.class); + assertThat(xRef, is(xAlias.toAttribute())); + + Limit limit = asLimit(topProject.child(), 1000, false); + EsRelation relation = as(limit.child(), EsRelation.class); + assertEquals("union_index*", relation.indexPattern()); + var relationOutput = relation.output(); + assertThat(relationOutput, hasSize(3)); + assertThat(relationOutput.get(1).name(), equalTo("id")); + assertThat(relationOutput.get(1).dataType(), equalTo(UNSUPPORTED)); + var syntheticField = relationOutput.get(2); + assertThat(syntheticField.name(), equalTo("$$id$converted_to$long")); + assertThat(syntheticField.dataType(), equalTo(LONG)); + assertThat(syntheticFieldAttr.id(), equalTo(syntheticField.id())); + } + /* * Renaming or shadowing the @timestamp field prior to running stats with TS command is not allowed. */