diff --git a/docs/changelog/110488.yaml b/docs/changelog/110488.yaml new file mode 100644 index 0000000000000..fbb439f20fc96 --- /dev/null +++ b/docs/changelog/110488.yaml @@ -0,0 +1,6 @@ +pr: 110488 +summary: "ESQL: Validate unique plan attribute names" +area: ES|QL +type: bug +issues: + - 110541 diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index 72c811a318a5d..82138aa238087 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -20,6 +20,8 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. +If a field name conflicts with an existing column, the existing column is dropped. +If a field name is used more than once, only the rightmost duplicate creates a column. ``:: A string used as the separator between appended values, when using the <>. diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index f34e77dbf5c23..2ece5a63e7570 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -31,11 +31,16 @@ name as the `match_field` defined in the <>. The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of -the enrich fields defined in the policy is added +the enrich fields defined in the policy is added. +A column with the same name as the enrich field will be dropped unless the +enrich field is renamed. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich fields. Defaults to the enrich field name. +If a column has the same name as the new name, it will be discarded. +If a name (new or original) occurs more than once, only the rightmost duplicate +creates a new column. *Description* diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index f77249736c1b3..00a7764d24004 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -16,10 +16,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. +If a column with the same name already exists, the existing column is dropped. +If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. +<>. Can use columns defined left of this one. *Description* diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index d631d17f7a42c..57c55a5bad53f 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -20,6 +20,9 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. +If a field name conflicts with an existing column, the existing column is discarded. +If a field name is used more than once, a multi-valued column will be created with one value +per each occurrence of the field name. *Description* @@ -67,4 +70,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] |=== include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== + +If a field name is used more than once, `GROK` creates a multi-valued +column: + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result] +|=== // end::examples[] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 468f459411640..3dbd0c69d8222 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -16,6 +16,8 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. +See below for the behavior in case an existing column matches multiple +given wildcards or column names. *Description* @@ -29,7 +31,7 @@ Fields are added in the order they appear. If one field matches multiple express 2. Partial wildcard expressions (for example: `fieldNam*`) 3. Wildcard only (`*`) -If a field matches two expressions with the same precedence, the right-most expression wins. +If a field matches two expressions with the same precedence, the rightmost expression wins. Refer to the examples for illustrations of these precedence rules. diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index 426527bf4d2d6..7bb3a5791deef 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -18,6 +18,7 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. +If the table's column names conflict with existing columns, the existing columns will be dropped. `match_field`:: The fields in the input to match against the table. diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 8507a826f085d..41e2ce9298ae8 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -17,7 +17,9 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] The name of a column you want to rename. `new_nameX`:: -The new name of the column. +The new name of the column. If it conflicts with an existing column name, +the existing column is dropped. If multiple columns are renamed to the same +name, all but the rightmost column with the same new name are dropped. *Description* diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc index 34ae81fd5414e..7377522a93201 100644 --- a/docs/reference/esql/processing-commands/stats.asciidoc +++ b/docs/reference/esql/processing-commands/stats.asciidoc @@ -18,12 +18,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN] `columnX`:: The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression (`expressionX`). +If multiple columns have the same name, all but the rightmost column with this +name will be ignored. `expressionX`:: An expression that computes an aggregated value. `grouping_expressionX`:: An expression that outputs the values to group by. +If its name coincides with one of the computed columns, that column will be ignored. NOTE: Individual `null` values are skipped when computing aggregations. diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc index 5c81d67c4ac22..28a4f29ae9a5b 100644 --- a/docs/reference/esql/source-commands/row.asciidoc +++ b/docs/reference/esql/source-commands/row.asciidoc @@ -16,6 +16,7 @@ ROW column1 = value1[, ..., columnN = valueN] `columnX`:: The column name. +In case of duplicate column names, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 530b2bc01b3d6..f9b768d67d574 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -96,8 +96,8 @@ public class CsvTestsDataLoader { "cartesian_multipolygons.csv" ); private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv"); - private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true); + private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -121,7 +121,8 @@ public class CsvTestsDataLoader { Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES), Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS), Map.entry(K8S.indexName, K8S), - Map.entry(DISTANCES.indexName, DISTANCES) + Map.entry(DISTANCES.indexName, DISTANCES), + Map.entry(ADDRESSES.indexName, ADDRESSES) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv new file mode 100644 index 0000000000000..0eea102400d60 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv @@ -0,0 +1,4 @@ +street:keyword,number:keyword,zip_code:keyword,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet.name:keyword,city.country.continent.planet.galaxy:keyword +Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth,Milky Way +Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth,Milky Way +Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth,Milky Way diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec index 812198c324217..8c4e797b7982d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| DISSECT city.name "%{city.country.continent.planet.name} %{?}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,18 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM employees +| KEEP first_name, last_name +| WHERE last_name == "Facello" +| EVAL name = concat(first_name, "1 ", last_name) +| DISSECT name "%{foo} %{foo}" +; + +first_name:keyword | last_name:keyword | name:keyword | foo:keyword +Georgi | Facello | Georgi1 Facello | Facello +; + complexPattern ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index d34620a9e118d..15fe6853ae491 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -436,6 +436,23 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected" // end::grokWithEscape-result[] ; +grokWithDuplicateFieldNames +// tag::grokWithDuplicateFieldNames[] +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +// end::grokWithDuplicateFieldNames[] +| SORT city.name +; + +// tag::grokWithDuplicateFieldNames-result[] +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +// end::grokWithDuplicateFieldNames-result[] +; + basicDissect // tag::basicDissect[] ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec index 35530cf6fdb8e..9886d6cce0ca2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec @@ -122,3 +122,53 @@ FROM employees | STATS COUNT(*), MIN(salary * 10), MAX(languages)| DROP `COUNT( MIN(salary * 10):i | MAX(languages):i 253240 | 5 ; + +// Not really shadowing, but let's keep the name consistent with the other command's tests +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last_name, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| DROP last*name, last*name, last*, last_name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +subfields +FROM addresses +| DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.planet.galaxy:keyword | city.name:keyword +Milky Way | Amsterdam +Milky Way | San Francisco +Milky Way | Tokyo +; + +subfieldsWildcard +FROM addresses +| DROP *.name, number, street, zip_code, *ame +; + +city.country.continent.planet.galaxy:keyword +Milky Way +Milky Way +Milky Way +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec index fc8c48afdf8cc..cf32e028b23bc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec @@ -69,6 +69,34 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = " left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword ; +shadowingSubfields +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +Netherlands | Amsterdam | null +United States of America | South San Francisco | San Francisco Int'l +Japan | Tokyo | null +; + +shadowingSubfieldsLimit0 +required_capability: enrich_load +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco") +| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport +| SORT city.name +| LIMIT 0 +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text +; + shadowingSelf required_capability: enrich_load ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" @@ -107,6 +135,46 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape ; +shadowingInternal +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH x = airport, x = region +; + +city:keyword | x:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit2 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport, airport = region +; + +city:keyword | airport:text +Zürich | Bezirk Zürich +; + +shadowingInternalImplicit3 +required_capability: enrich_load +ROW city = "Zürich" +| ENRICH city_names ON city WITH airport = region, airport +; + +city:keyword | airport:text +Zürich | Zurich Int'l +; + simple required_capability: enrich_load diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec index 3df3b85e5e3af..87f54fbf0f174 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec @@ -15,6 +15,19 @@ left:keyword | right:keyword | x:integer left | right | 1 ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name) +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | EARTH +United States of America | San Francisco | EARTH +Japan | Tokyo | EARTH +; + shadowingSelf ROW left = "left", x = 10000 , right = "right" | EVAL x = x + 1 @@ -33,6 +46,16 @@ left:keyword | middle:keyword | right:keyword | x:integer | y:integer left | middle | right | 9 | 10 ; +shadowingInternal +ROW x = 10000 +| EVAL x = x + 1, x = x - 2 +; + +x:integer +9999 +; + + withMath row a = 1 | eval b = 2 + 3; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 9d574eed7be6b..d9857e8c122ef 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam Georgi | left | Georgi Facello | right | Facello ; +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.name, city.name +| GROK city.name "%{WORD:city.country.continent.planet.name} %{WORD}" +| SORT city.name +; + +city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam | null +United States of America | San Francisco | San +Japan | Tokyo | null +; + shadowingSelf FROM employees | KEEP first_name, last_name @@ -50,6 +63,18 @@ last_name:keyword | left:keyword | foo:keyword | middle:keyword | ri Facello | left | Georgi1 Georgi2 Facello | middle | right | Georgi1 | Georgi2 | Facello ; +shadowingInternal +FROM addresses +| KEEP city.name, zip_code +| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}" +| SORT city.name +; + +city.name:keyword | zip_code:keyword | zip_parts:keyword +Amsterdam | 1016 ED | ["1016", "ED"] +San Francisco | CA 94108 | ["CA", "94108"] +Tokyo | 100-7014 | null +; complexPattern ROW a = "1953-01-23T12:15:00Z 127.0.0.1 some.email@foo.com 42" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec index 14a3807b8729c..bcce35eb81e0f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/keep.csv-spec @@ -539,3 +539,63 @@ c:i 1 1 ; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP last_name, emp_no, last_name +| LIMIT 2 +; + +emp_no:integer | last_name:keyword + 10001 | Facello + 10002 | Simmel +; + +shadowingInternalWildcard +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last*name, first_name, last*, gender, last* +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | gender:keyword | last_name:keyword + 10001 | Georgi | M | Facello + 10002 | Bezalel | F | Simmel +; + +shadowingInternalWildcardAndExplicit +FROM employees +| SORT emp_no ASC +| KEEP last*name, emp_no, last_name, first_name, last*, languages, last_name, gender, last*name +| LIMIT 2 +; + +emp_no:integer | first_name:keyword | languages:integer | last_name:keyword | gender:keyword + 10001 | Georgi | 2 | Facello | M + 10002 | Bezalel | 5 | Simmel | F +; + +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; + +shadowingSubfieldsWildcard +FROM addresses +| KEEP *name, city.country.continent.planet.name +| SORT city.name +; + +city.country.continent.name:keyword | city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword +Europe | Netherlands | Amsterdam | Earth +North America | United States of America | San Francisco | Earth +Asia | Japan | Tokyo | Earth +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json new file mode 100644 index 0000000000000..679efb3c8d38b --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-addresses.json @@ -0,0 +1,44 @@ +{ + "properties" : { + "street" : { + "type": "keyword" + }, + "number" : { + "type": "keyword" + }, + "zip_code": { + "type": "keyword" + }, + "city" : { + "properties": { + "name": { + "type": "keyword" + }, + "country": { + "properties": { + "name": { + "type": "keyword" + }, + "continent": { + "properties": { + "name": { + "type": "keyword" + }, + "planet": { + "properties": { + "name": { + "type": "keyword" + }, + "galaxy": { + "type": "keyword" + } + } + } + } + } + } + } + } + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec index 1e830486cc7c7..ca4c627cae749 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/rename.csv-spec @@ -174,3 +174,42 @@ avg_worked_seconds:l | birth_date:date | emp_no:i | first_n 341158890 | 1961-10-15T00:00:00.000Z | 10060 | Breannda | M | 1.42 | 1.4199999570846558 | 1.419921875 | 1.42 | 1987-11-02T00:00:00.000Z | [false, false, false, true]| [Business Analyst, Data Scientist, Senior Team Lead] | 2 | 2 | 2 | 2 | Billingsley | 29175 | [-1.76, -0.85] | [-1, 0] | [-0.85, -1.76] | [-1, 0] | true | 29175 246355863 | null | 10042 | Magy | F | 1.44 | 1.440000057220459 | 1.4404296875 | 1.44 | 1993-03-21T00:00:00.000Z | null | [Architect, Business Analyst, Internship, Junior Developer] | 3 | 3 | 3 | 3 | Stamatiou | 30404 | [-9.28, 9.42] | [-9, 9] | [-9.28, 9.42] | [-9, 9] | true | 30404 ; + +shadowing +FROM employees +| SORT emp_no ASC +| KEEP emp_no, first_name, last_name +| RENAME emp_no AS last_name +| LIMIT 2 +; + +last_name:integer | first_name:keyword + 10001 | Georgi + 10002 | Bezalel +; + +shadowingSubfields +FROM addresses +| KEEP city.country.continent.planet.name, city.country.continent.name, city.country.name, city.name +| RENAME city.name AS city.country.continent.planet.name, city.country.name AS city.country.continent.name +| SORT city.country.continent.planet.name +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Amsterdam +United States of America | San Francisco +Japan | Tokyo +; + +shadowingInternal +FROM employees +| SORT emp_no ASC +| KEEP emp_no, last_name +| RENAME emp_no AS x, last_name AS x +| LIMIT 2 +; + +x:keyword +Facello +Simmel +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec index bb1cf7358ca74..da640b6306299 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/row.csv-spec @@ -36,6 +36,24 @@ a:integer // end::multivalue-result[] ; +shadowingInternal +required_capability: unique_names +ROW a = 1, a = 2; + +a:integer + 2 +; + +shadowingInternalSubfields +required_capability: unique_names +// Fun fact: "Sissi" is an actual exoplanet name, after the character from the movie with the same name. A.k.a. HAT-P-14 b. +ROW city.country.continent.planet.name = "Earth", city.country.continent.name = "Netherlands", city.country.continent.planet.name = "Sissi" +; + +city.country.continent.name:keyword | city.country.continent.planet.name:keyword +Netherlands | Sissi +; + unsignedLongLiteral ROW long_max = 9223372036854775807, ul_start = 9223372036854775808, ul_end = 18446744073709551615, double=18446744073709551616; @@ -70,10 +88,11 @@ a:integer | b:integer | c:null | z:integer ; evalRowWithNull2 +required_capability: unique_names row a = 1, null, b = 2, c = null, null | eval z = a+b; -a:integer | null:null | b:integer | c:null | null:null | z:integer -1 | null | 2 | null | null | 3 +a:integer | b:integer | c:null | null:null | z:integer + 1 | 2 | null | null | 3 ; evalRowWithNull3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 596e671679eb3..b64dcf7bf5ca4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -1886,6 +1886,39 @@ w_avg:double null ; +shadowingInternal +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) +; + +x:integer +10001 +; + +shadowingInternalWithGroup +FROM employees +| STATS x = MAX(emp_no), x = MIN(emp_no) BY x = gender +| SORT x ASC +; + +x:keyword +F +M +null +; + +shadowingTheGroup +FROM employees +| STATS gender = MAX(emp_no), gender = MIN(emp_no) BY gender +| SORT gender ASC +; + +gender:keyword +F +M +null +; + docsStatsMvGroup // tag::mv-group[] ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index be08d61ab8d6c..98c6d8f4332be 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -152,14 +152,20 @@ public enum Cap { FIX_COUNT_DISTINCT_SOURCE_ERROR, /** - * Use RangeQuery for BinaryComparison on DateTime fields. - * */ + * Use RangeQuery for BinaryComparison on DateTime fields. + */ RANGEQUERY_FOR_DATETIME, /** * Add tests for #105383, STATS BY constant. */ - STATS_BY_CONSTANT; + STATS_BY_CONSTANT, + + /** + * Fix for non-unique attribute names in ROW and logical plans. + * https://github.com/elastic/elasticsearch/issues/110541 + */ + UNIQUE_NAMES; private final boolean snapshotOnly; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index cc12d3730f495..a691f88f29f99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -1068,13 +1068,29 @@ public static Expression castStringLiteral(Expression from, DataType target) { * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule * (See UnresolveUnionTypes below). */ - private static class ResolveUnionTypes extends BaseAnalyzerRule { + private static class ResolveUnionTypes extends Rule { record TypeResolutionKey(String fieldName, DataType fieldType) {} + private List unionFieldAttributes; + @Override - protected LogicalPlan doRule(LogicalPlan plan) { - List unionFieldAttributes = new ArrayList<>(); + public LogicalPlan apply(LogicalPlan plan) { + unionFieldAttributes = new ArrayList<>(); + // Collect field attributes from previous runs + plan.forEachUp(EsRelation.class, rel -> { + for (Attribute attr : rel.output()) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField) { + unionFieldAttributes.add(fa); + } + } + }); + + return plan.transformUp(LogicalPlan.class, p -> p.resolved() || p.childrenResolved() == false ? p : doRule(p)); + } + + private LogicalPlan doRule(LogicalPlan plan) { + int alreadyAddedUnionFieldAttributes = unionFieldAttributes.size(); // See if the eval function has an unresolved MultiTypeEsField field // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) plan = plan.transformExpressionsOnly( @@ -1082,7 +1098,7 @@ protected LogicalPlan doRule(LogicalPlan plan) { convert -> resolveConvertFunction(convert, unionFieldAttributes) ); // If no union fields were generated, return the plan as is - if (unionFieldAttributes.isEmpty()) { + if (unionFieldAttributes.size() == alreadyAddedUnionFieldAttributes) { return plan; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java index 3314129fae405..242c947e56de9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/AnalyzerRules.java @@ -20,8 +20,6 @@ import java.util.function.Predicate; import java.util.function.Supplier; -import static java.util.Collections.singletonList; - public final class AnalyzerRules { public abstract static class AnalyzerRule extends Rule { @@ -138,14 +136,6 @@ public static List maybeResolveAgainstList( ) .toList(); - return singletonList( - ua.withUnresolvedMessage( - "Reference [" - + ua.qualifiedName() - + "] is ambiguous (to disambiguate use quotes or qualifiers); " - + "matches any of " - + refs - ) - ); + throw new IllegalStateException("Reference [" + ua.qualifiedName() + "] is ambiguous; " + "matches any of " + refs); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java index bff76fb1a706e..d9141d737c949 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/OptimizerRules.java @@ -8,8 +8,10 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.Expressions; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.plan.QueryPlan; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Enrich; @@ -36,6 +38,9 @@ import org.elasticsearch.xpack.esql.plan.physical.RowExec; import org.elasticsearch.xpack.esql.plan.physical.ShowExec; +import java.util.HashSet; +import java.util.Set; + import static org.elasticsearch.xpack.esql.common.Failure.fail; class OptimizerRules { @@ -49,9 +54,24 @@ void checkPlan(P p, Failures failures) { AttributeSet input = p.inputSet(); AttributeSet generated = generates(p); AttributeSet missing = refs.subtract(input).subtract(generated); - if (missing.size() > 0) { + if (missing.isEmpty() == false) { failures.add(fail(p, "Plan [{}] optimized incorrectly due to missing references {}", p.nodeString(), missing)); } + + Set outputAttributeNames = new HashSet<>(); + Set outputAttributeIds = new HashSet<>(); + for (Attribute outputAttr : p.output()) { + if (outputAttributeNames.add(outputAttr.name()) == false || outputAttributeIds.add(outputAttr.id()) == false) { + failures.add( + fail( + p, + "Plan [{}] optimized incorrectly due to duplicate output attribute {}", + p.nodeString(), + outputAttr.toString() + ) + ); + } + } } protected AttributeSet references(P p) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index f2603eedf8b84..586b18002562d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -73,6 +73,7 @@ import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.source; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.typedParsing; import static org.elasticsearch.xpack.esql.core.parser.ParserUtils.visitList; +import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions; import static org.elasticsearch.xpack.esql.plan.logical.Enrich.Mode; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToInt; @@ -234,8 +235,9 @@ public Map visitCommandOptions(EsqlBaseParser.CommandOptionsCont } @Override + @SuppressWarnings("unchecked") public LogicalPlan visitRowCommand(EsqlBaseParser.RowCommandContext ctx) { - return new Row(source(ctx), visitFields(ctx.fields())); + return new Row(source(ctx), (List) (List) mergeOutputExpressions(visitFields(ctx.fields()), List.of())); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java index 5e4b45d7127fe..f0b38d281474e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Rename.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan.logical; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; @@ -28,6 +29,12 @@ public List renamings() { return renamings; } + @Override + public List output() { + // Rename is mapped to a Project during analysis; we do not compute the output here. + throw new IllegalStateException("Should never reach here."); + } + @Override public boolean expressionsResolved() { for (var alias : renamings) {