diff --git a/docs/changelog/107545.yaml b/docs/changelog/107545.yaml new file mode 100644 index 0000000000000..ad457cc5a533f --- /dev/null +++ b/docs/changelog/107545.yaml @@ -0,0 +1,6 @@ +pr: 107545 +summary: "ESQL: Union Types Support" +area: ES|QL +type: enhancement +issues: + - 100603 diff --git a/muted-tests.yml b/muted-tests.yml index ef3c8188498a9..aef4b526e8b52 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -103,3 +103,10 @@ tests: # - class: org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIPTests # method: testCrankyEvaluateBlockWithoutNulls # issue: https://github.com/elastic/elasticsearch/... +# +# Mute a single test in an ES|QL csv-spec test file: +# - class: "org.elasticsearch.xpack.esql.CsvTests" +# method: "test {union_types.MultiIndexIpStringStatsInline}" +# issue: "https://github.com/elastic/elasticsearch/..." +# Note that this mutes for the unit-test-like CsvTests only. +# Muting for the integration tests needs to be done for each IT class individually. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index a91f005d6d5ab..42feda3e9dd48 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -92,6 +92,16 @@ interface StoredFields { */ SortedSetDocValues ordinals(LeafReaderContext context) throws IOException; + /** + * In support of 'Union Types', we sometimes desire that Blocks loaded from source are immediately + * converted in some way. Typically, this would be a type conversion, or an encoding conversion. + * @param block original block loaded from source + * @return converted block (or original if no conversion required) + */ + default Block convert(Block block) { + return block; + } + /** * Load blocks with only null. */ diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java index a6e713007a97f..0f7d92564c8ab 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java @@ -168,12 +168,14 @@ protected Attribute clone( @Override public int hashCode() { - return Objects.hash(super.hashCode(), path); + return Objects.hash(super.hashCode(), path, field); } @Override public boolean equals(Object obj) { - return super.equals(obj) && Objects.equals(path, ((FieldAttribute) obj).path); + return super.equals(obj) + && Objects.equals(path, ((FieldAttribute) obj).path) + && Objects.equals(field, ((FieldAttribute) obj).field); } @Override diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java index fd7bfbec4730f..9b088cfb19f6c 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java @@ -15,11 +15,15 @@ import java.io.IOException; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.TreeMap; /** * Representation of field mapped differently across indices. * Used during mapping discovery only. + * Note that the field typesToIndices is not serialized because that information is + * not required through the cluster, only surviving as long as the Analyser phase of query planning. + * It is used specifically for the 'union types' feature in ES|QL. */ public class InvalidMappedField extends EsField { static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -29,10 +33,10 @@ public class InvalidMappedField extends EsField { ); private final String errorMessage; + private final Map> typesToIndices; public InvalidMappedField(String name, String errorMessage, Map properties) { - super(name, DataType.UNSUPPORTED, properties, false); - this.errorMessage = errorMessage; + this(name, errorMessage, properties, Map.of()); } public InvalidMappedField(String name, String errorMessage) { @@ -43,6 +47,19 @@ public InvalidMappedField(String name) { this(name, StringUtils.EMPTY, new TreeMap<>()); } + /** + * Constructor supporting union types, used in ES|QL. + */ + public InvalidMappedField(String name, Map> typesToIndices) { + this(name, makeErrorMessage(typesToIndices), new TreeMap<>(), typesToIndices); + } + + private InvalidMappedField(String name, String errorMessage, Map properties, Map> typesToIndices) { + super(name, DataType.UNSUPPORTED, properties, false); + this.errorMessage = errorMessage; + this.typesToIndices = typesToIndices; + } + private InvalidMappedField(StreamInput in) throws IOException { this(in.readString(), in.readString(), in.readImmutableMap(StreamInput::readString, i -> i.readNamedWriteable(EsField.class))); } @@ -88,4 +105,28 @@ public EsField getExactField() { public Exact getExactInfo() { return new Exact(false, "Field [" + getName() + "] is invalid, cannot access it"); } + + public Map> getTypesToIndices() { + return typesToIndices; + } + + private static String makeErrorMessage(Map> typesToIndices) { + StringBuilder errorMessage = new StringBuilder(); + errorMessage.append("mapped as ["); + errorMessage.append(typesToIndices.size()); + errorMessage.append("] incompatible types: "); + boolean first = true; + for (Map.Entry> e : typesToIndices.entrySet()) { + if (first) { + first = false; + } else { + errorMessage.append(", "); + } + errorMessage.append("["); + errorMessage.append(e.getKey()); + errorMessage.append("] in "); + errorMessage.append(e.getValue()); + } + return errorMessage.toString(); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java index 4d41ab27312c3..2e46735bd5bd1 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java @@ -83,7 +83,9 @@ private Page(boolean copyBlocks, int positionCount, Block[] blocks) { private Page(Page prev, Block[] toAdd) { for (Block block : toAdd) { if (prev.positionCount != block.getPositionCount()) { - throw new IllegalArgumentException("Block [" + block + "] does not have same position count"); + throw new IllegalArgumentException( + "Block [" + block + "] does not have same position count: " + block.getPositionCount() + " != " + prev.positionCount + ); } } this.positionCount = prev.positionCount; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java index 06b1375ac057e..ee747d98c26f8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java @@ -165,6 +165,7 @@ public int get(int i) { } } success = true; + return page.appendBlocks(blocks); } catch (IOException e) { throw new UncheckedIOException(e); } finally { @@ -172,7 +173,6 @@ public int get(int i) { Releasables.closeExpectNoException(blocks); } } - return page.appendBlocks(blocks); } private void positionFieldWork(int shard, int segment, int firstDoc) { @@ -233,6 +233,7 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa new RowStrideReaderWork( field.rowStride(ctx), (Block.Builder) field.loader.builder(loaderBlockFactory, docs.count()), + field.loader, f ) ); @@ -262,17 +263,13 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa ); for (int p = 0; p < docs.count(); p++) { int doc = docs.get(p); - if (storedFields != null) { - storedFields.advanceTo(doc); - } - for (int r = 0; r < rowStrideReaders.size(); r++) { - RowStrideReaderWork work = rowStrideReaders.get(r); - work.reader.read(doc, storedFields, work.builder); + storedFields.advanceTo(doc); + for (RowStrideReaderWork work : rowStrideReaders) { + work.read(doc, storedFields); } } - for (int r = 0; r < rowStrideReaders.size(); r++) { - RowStrideReaderWork work = rowStrideReaders.get(r); - blocks[work.offset] = work.builder.build(); + for (RowStrideReaderWork work : rowStrideReaders) { + blocks[work.offset] = work.build(); } } finally { Releasables.close(rowStrideReaders); @@ -310,7 +307,9 @@ private class LoadFromMany implements Releasable { private final IntVector docs; private final int[] forwards; private final int[] backwards; - private final Block.Builder[] builders; + private final Block.Builder[][] builders; + private final BlockLoader[][] converters; + private final Block.Builder[] fieldTypeBuilders; private final BlockLoader.RowStrideReader[] rowStride; BlockLoaderStoredFieldsFromLeafLoader storedFields; @@ -322,21 +321,25 @@ private class LoadFromMany implements Releasable { docs = docVector.docs(); forwards = docVector.shardSegmentDocMapForwards(); backwards = docVector.shardSegmentDocMapBackwards(); - builders = new Block.Builder[target.length]; + fieldTypeBuilders = new Block.Builder[target.length]; + builders = new Block.Builder[target.length][shardContexts.size()]; + converters = new BlockLoader[target.length][shardContexts.size()]; rowStride = new BlockLoader.RowStrideReader[target.length]; } void run() throws IOException { for (int f = 0; f < fields.length; f++) { /* - * Important note: each block loader has a method to build an - * optimized block loader, but we have *many* fields and some - * of those block loaders may not be compatible with each other. - * So! We take the least common denominator which is the loader - * from the element expected element type. + * Important note: each field has a desired type, which might not match the mapped type (in the case of union-types). + * We create the final block builders using the desired type, one for each field, but then also use inner builders + * (one for each field and shard), and converters (again one for each field and shard) to actually perform the field + * loading in a way that is correct for the mapped field type, and then convert between that type and the desired type. */ - builders[f] = fields[f].info.type.newBlockBuilder(docs.getPositionCount(), blockFactory); + fieldTypeBuilders[f] = fields[f].info.type.newBlockBuilder(docs.getPositionCount(), blockFactory); + builders[f] = new Block.Builder[shardContexts.size()]; + converters[f] = new BlockLoader[shardContexts.size()]; } + ComputeBlockLoaderFactory loaderBlockFactory = new ComputeBlockLoaderFactory(blockFactory, docs.getPositionCount()); int p = forwards[0]; int shard = shards.getInt(p); int segment = segments.getInt(p); @@ -344,7 +347,8 @@ void run() throws IOException { positionFieldWork(shard, segment, firstDoc); LeafReaderContext ctx = ctx(shard, segment); fieldsMoved(ctx, shard); - read(firstDoc); + verifyBuilders(loaderBlockFactory, shard); + read(firstDoc, shard); for (int i = 1; i < forwards.length; i++) { p = forwards[i]; shard = shards.getInt(p); @@ -354,11 +358,19 @@ void run() throws IOException { ctx = ctx(shard, segment); fieldsMoved(ctx, shard); } - read(docs.getInt(p)); + verifyBuilders(loaderBlockFactory, shard); + read(docs.getInt(p), shard); } - for (int f = 0; f < builders.length; f++) { - try (Block orig = builders[f].build()) { - target[f] = orig.filter(backwards); + for (int f = 0; f < target.length; f++) { + for (int s = 0; s < shardContexts.size(); s++) { + if (builders[f][s] != null) { + try (Block orig = (Block) converters[f][s].convert(builders[f][s].build())) { + fieldTypeBuilders[f].copyFrom(orig, 0, orig.getPositionCount()); + } + } + } + try (Block targetBlock = fieldTypeBuilders[f].build()) { + target[f] = targetBlock.filter(backwards); } } } @@ -379,16 +391,29 @@ private void fieldsMoved(LeafReaderContext ctx, int shard) throws IOException { } } - private void read(int doc) throws IOException { + private void verifyBuilders(ComputeBlockLoaderFactory loaderBlockFactory, int shard) { + for (int f = 0; f < fields.length; f++) { + if (builders[f][shard] == null) { + // Note that this relies on field.newShard() to set the loader and converter correctly for the current shard + builders[f][shard] = (Block.Builder) fields[f].loader.builder(loaderBlockFactory, docs.getPositionCount()); + converters[f][shard] = fields[f].loader; + } + } + } + + private void read(int doc, int shard) throws IOException { storedFields.advanceTo(doc); for (int f = 0; f < builders.length; f++) { - rowStride[f].read(doc, storedFields, builders[f]); + rowStride[f].read(doc, storedFields, builders[f][shard]); } } @Override public void close() { - Releasables.closeExpectNoException(builders); + Releasables.closeExpectNoException(fieldTypeBuilders); + for (int f = 0; f < fields.length; f++) { + Releasables.closeExpectNoException(builders[f]); + } } } @@ -468,7 +493,17 @@ private void trackReader(String type, BlockLoader.Reader reader) { } } - private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, int offset) implements Releasable { + private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, BlockLoader loader, int offset) + implements + Releasable { + void read(int doc, BlockLoaderStoredFieldsFromLeafLoader storedFields) throws IOException { + reader.read(doc, storedFields, builder); + } + + Block build() { + return (Block) loader.convert(builder.build()); + } + @Override public void close() { builder.close(); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java new file mode 100644 index 0000000000000..66bcf2a57e393 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java @@ -0,0 +1,2020 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.breaker.NoopCircuitBreaker; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.DoubleVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.data.TestBlockFactory; +import org.elasticsearch.compute.operator.AnyOperatorTestCase; +import org.elasticsearch.compute.operator.CannedSourceOperator; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.DriverRunner; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.compute.operator.PageConsumerOperator; +import org.elasticsearch.compute.operator.SequenceLongBlockSourceOperator; +import org.elasticsearch.compute.operator.SourceOperator; +import org.elasticsearch.compute.operator.TestResultPageSinkOperator; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.threadpool.FixedExecutorBuilder; +import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.StringUtils; +import org.hamcrest.Matcher; +import org.junit.After; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.elasticsearch.xpack.esql.core.type.DataType.IP; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.oneOf; +import static org.hamcrest.Matchers.sameInstance; + +/** + * These tests are partial duplicates of the tests in ValuesSourceReaderOperatorTests, and focus on testing the behaviour + * of the ValuesSourceReaderOperator, but with a few key differences: + *
    + *
  • Multiple indexes and index mappings are defined and tested
  • + *
  • + * Most primitive types also include a field with prefix 'str_' which is stored and mapped as a string, + * but expected to be extracted and converted directly to the primitive type. + * For example: "str_long": "1" should be read directly into a field named "str_long" of type "long" and value 1. + * This tests the ability of the BlockLoader.convert(Block) method to convert a string to a primitive type. + *
  • + *
  • + * Each index has a few additional custom fields that are stored as specific types, but should be converted to strings by the + * BlockLoader.convert(Block) method. These fields are: + *
      + *
    • ip: stored as an IP type, but should be converted to a string
    • + *
    • duration: stored as a long type, but should be converted to a string
    • + *
    + * One index stores them as IP and long types, and the other as keyword types, so we test the behaviour of the + * 'union types' capabilities of the ValuesSourceReaderOperator class. + *
  • + *
+ * Since this test does not have access to the type conversion code in the ESQL module, we have mocks for that behaviour + * in the inner classes TestTypeConvertingBlockLoader and TestBlockConverter. + */ +@SuppressWarnings("resource") +public class ValueSourceReaderTypeConversionTests extends AnyOperatorTestCase { + private static final String[] PREFIX = new String[] { "a", "b", "c" }; + private static final Map INDICES = new LinkedHashMap<>(); + static { + addIndex( + Map.of( + "ip", + new TestFieldType<>("ip", IP, d -> "192.169.0." + d % 256, Checks::unionIPsAsStrings), + "duration", + new TestFieldType<>("duration", DataType.LONG, d -> (long) d, Checks::unionDurationsAsStrings) + ) + ); + addIndex( + Map.of( + "ip", + new TestFieldType<>("ip", DataType.KEYWORD, d -> "192.169.0." + d % 256, Checks::unionIPsAsStrings), + "duration", + new TestFieldType<>("duration", DataType.KEYWORD, d -> Integer.toString(d), Checks::unionDurationsAsStrings) + ) + ); + } + + static void addIndex(Map> fieldTypes) { + String indexKey = "index" + (INDICES.size() + 1); + INDICES.put(indexKey, new TestIndexMappingConfig(indexKey, INDICES.size(), fieldTypes)); + } + + private record TestIndexMappingConfig(String indexName, int shardIdx, Map> fieldTypes) {} + + private record TestFieldType(String name, DataType dataType, Function valueGenerator, CheckResults checkResults) {} + + private final Map directories = new HashMap<>(); + private final Map mapperServices = new HashMap<>(); + private final Map readers = new HashMap<>(); + private static final Map> keyToTags = new HashMap<>(); + + @After + public void closeIndex() throws IOException { + IOUtils.close(readers.values()); + IOUtils.close(directories.values()); + } + + private Directory directory(String indexKey) { + return directories.computeIfAbsent(indexKey, k -> newDirectory()); + } + + private MapperService mapperService(String indexKey) { + return mapperServices.get(indexKey); + } + + private List initShardContexts() { + return INDICES.keySet() + .stream() + .map(index -> new ValuesSourceReaderOperator.ShardContext(reader(index), () -> SourceLoader.FROM_STORED_SOURCE)) + .toList(); + } + + private IndexReader reader(String indexKey) { + if (readers.get(indexKey) == null) { + try { + initIndex(indexKey, 100, 10); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return readers.get(indexKey); + } + + @Override + protected Operator.OperatorFactory simple() { + return factory(initShardContexts(), mapperService("index1").fieldType("long"), ElementType.LONG); + } + + public static Operator.OperatorFactory factory( + List shardContexts, + MappedFieldType ft, + ElementType elementType + ) { + return factory(shardContexts, ft.name(), elementType, ft.blockLoader(null)); + } + + private static Operator.OperatorFactory factory( + List shardContexts, + String name, + ElementType elementType, + BlockLoader loader + ) { + return new ValuesSourceReaderOperator.Factory(List.of(new ValuesSourceReaderOperator.FieldInfo(name, elementType, shardIdx -> { + if (shardIdx < 0 || shardIdx >= INDICES.size()) { + fail("unexpected shardIdx [" + shardIdx + "]"); + } + return loader; + })), shardContexts, 0); + } + + protected SourceOperator simpleInput(DriverContext context, int size) { + return simpleInput(context, size, commitEvery(size), randomPageSize()); + } + + private int commitEvery(int numDocs) { + return Math.max(1, (int) Math.ceil((double) numDocs / 10)); + } + + private SourceOperator simpleInput(DriverContext context, int size, int commitEvery, int pageSize) { + List shardContexts = new ArrayList<>(); + try { + for (String indexKey : INDICES.keySet()) { + initIndex(indexKey, size, commitEvery); + shardContexts.add(new LuceneSourceOperatorTests.MockShardContext(reader(indexKey), INDICES.get(indexKey).shardIdx)); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + var luceneFactory = new LuceneSourceOperator.Factory( + shardContexts, + ctx -> new MatchAllDocsQuery(), + DataPartitioning.SHARD, + 1,// randomIntBetween(1, 10), + pageSize, + LuceneOperator.NO_LIMIT + ); + return luceneFactory.get(context); + } + + private void initMapping(String indexKey) throws IOException { + TestIndexMappingConfig indexMappingConfig = INDICES.get(indexKey); + mapperServices.put(indexKey, new MapperServiceTestCase() { + }.createMapperService(MapperServiceTestCase.mapping(b -> { + fieldExamples(b, "key", "integer"); // unique key per-index to use for looking up test values to compare to + fieldExamples(b, "indexKey", "keyword"); // index name (can be used to choose index-specific test values) + fieldExamples(b, "int", "integer"); + fieldExamples(b, "short", "short"); + fieldExamples(b, "byte", "byte"); + fieldExamples(b, "long", "long"); + fieldExamples(b, "double", "double"); + fieldExamples(b, "kwd", "keyword"); + b.startObject("stored_kwd").field("type", "keyword").field("store", true).endObject(); + b.startObject("mv_stored_kwd").field("type", "keyword").field("store", true).endObject(); + + simpleField(b, "missing_text", "text"); + + for (Map.Entry> entry : indexMappingConfig.fieldTypes.entrySet()) { + String fieldName = entry.getKey(); + TestFieldType fieldType = entry.getValue(); + simpleField(b, fieldName, fieldType.dataType.typeName()); + } + }))); + } + + private void initIndex(String indexKey, int size, int commitEvery) throws IOException { + initMapping(indexKey); + readers.put(indexKey, initIndex(indexKey, directory(indexKey), size, commitEvery)); + } + + private IndexReader initIndex(String indexKey, Directory directory, int size, int commitEvery) throws IOException { + keyToTags.computeIfAbsent(indexKey, k -> new HashMap<>()).clear(); + TestIndexMappingConfig indexMappingConfig = INDICES.get(indexKey); + try ( + IndexWriter writer = new IndexWriter( + directory, + newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) + ) + ) { + for (int d = 0; d < size; d++) { + XContentBuilder source = JsonXContent.contentBuilder(); + source.startObject(); + source.field("key", d); // documents in this index have a unique key, from which most other values can be derived + source.field("indexKey", indexKey); // all documents in this index have the same indexKey + + source.field("long", d); + source.field("str_long", Long.toString(d)); + source.startArray("mv_long"); + for (int v = 0; v <= d % 3; v++) { + source.value(-1_000L * d + v); + } + source.endArray(); + source.field("source_long", (long) d); + source.startArray("mv_source_long"); + for (int v = 0; v <= d % 3; v++) { + source.value(-1_000L * d + v); + } + source.endArray(); + + source.field("int", d); + source.field("str_int", Integer.toString(d)); + source.startArray("mv_int"); + for (int v = 0; v <= d % 3; v++) { + source.value(1_000 * d + v); + } + source.endArray(); + source.field("source_int", d); + source.startArray("mv_source_int"); + for (int v = 0; v <= d % 3; v++) { + source.value(1_000 * d + v); + } + source.endArray(); + + source.field("short", (short) d); + source.field("str_short", Short.toString((short) d)); + source.startArray("mv_short"); + for (int v = 0; v <= d % 3; v++) { + source.value((short) (2_000 * d + v)); + } + source.endArray(); + source.field("source_short", (short) d); + source.startArray("mv_source_short"); + for (int v = 0; v <= d % 3; v++) { + source.value((short) (2_000 * d + v)); + } + source.endArray(); + + source.field("byte", (byte) d); + source.field("str_byte", Byte.toString((byte) d)); + source.startArray("mv_byte"); + for (int v = 0; v <= d % 3; v++) { + source.value((byte) (3_000 * d + v)); + } + source.endArray(); + source.field("source_byte", (byte) d); + source.startArray("mv_source_byte"); + for (int v = 0; v <= d % 3; v++) { + source.value((byte) (3_000 * d + v)); + } + source.endArray(); + + source.field("double", d / 123_456d); + source.field("str_double", Double.toString(d / 123_456d)); + source.startArray("mv_double"); + for (int v = 0; v <= d % 3; v++) { + source.value(d / 123_456d + v); + } + source.endArray(); + source.field("source_double", d / 123_456d); + source.startArray("mv_source_double"); + for (int v = 0; v <= d % 3; v++) { + source.value(d / 123_456d + v); + } + source.endArray(); + + String tag = keyToTags.get(indexKey).computeIfAbsent(d, k -> "tag-" + randomIntBetween(1, 5)); + source.field("kwd", tag); + source.field("str_kwd", tag); + source.startArray("mv_kwd"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); + } + source.endArray(); + source.field("stored_kwd", Integer.toString(d)); + source.startArray("mv_stored_kwd"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); + } + source.endArray(); + source.field("source_kwd", Integer.toString(d)); + source.startArray("mv_source_kwd"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); + } + source.endArray(); + + source.field("text", Integer.toString(d)); + source.startArray("mv_text"); + for (int v = 0; v <= d % 3; v++) { + source.value(PREFIX[v] + d); + } + source.endArray(); + + for (Map.Entry> entry : indexMappingConfig.fieldTypes.entrySet()) { + String fieldName = entry.getKey(); + TestFieldType fieldType = entry.getValue(); + source.field(fieldName, fieldType.valueGenerator.apply(d)); + } + + source.endObject(); + + ParsedDocument doc = mapperService(indexKey).documentParser() + .parseDocument( + new SourceToParse("id" + d, BytesReference.bytes(source), XContentType.JSON), + mapperService(indexKey).mappingLookup() + ); + writer.addDocuments(doc.docs()); + + if (d % commitEvery == commitEvery - 1) { + writer.commit(); + } + } + } + return DirectoryReader.open(directory); + } + + @Override + protected Matcher expectedDescriptionOfSimple() { + return equalTo("ValuesSourceReaderOperator[fields = [long]]"); + } + + @Override + protected Matcher expectedToStringOfSimple() { + return expectedDescriptionOfSimple(); + } + + public void testLoadAll() { + DriverContext driverContext = driverContext(); + loadSimpleAndAssert( + driverContext, + CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000))), + Block.MvOrdering.SORTED_ASCENDING, + Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING + ); + } + + public void testLoadAllInOnePage() { + DriverContext driverContext = driverContext(); + loadSimpleAndAssert( + driverContext, + List.of(CannedSourceOperator.mergePages(CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000))))), + Block.MvOrdering.UNORDERED, + Block.MvOrdering.UNORDERED + ); + } + + public void testManySingleDocPages() { + String indexKey = "index1"; + DriverContext driverContext = driverContext(); + int numDocs = between(10, 100); + List input = CannedSourceOperator.collectPages(simpleInput(driverContext, numDocs, between(1, numDocs), 1)); + Randomness.shuffle(input); + List shardContexts = initShardContexts(); + List operators = new ArrayList<>(); + Checks checks = new Checks(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING, Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING); + FieldCase testCase = new FieldCase( + new KeywordFieldMapper.KeywordFieldType("kwd"), + ElementType.BYTES_REF, + checks::tags, + StatusChecks::keywordsFromDocValues + ); + // TODO: Add index2 + operators.add( + new ValuesSourceReaderOperator.Factory( + List.of(testCase.info, fieldInfo(mapperService(indexKey).fieldType("key"), ElementType.INT)), + shardContexts, + 0 + ).get(driverContext) + ); + List results = drive(operators, input.iterator(), driverContext); + assertThat(results, hasSize(input.size())); + for (Page page : results) { + assertThat(page.getBlockCount(), equalTo(3)); + IntVector keys = page.getBlock(2).asVector(); + for (int p = 0; p < page.getPositionCount(); p++) { + int key = keys.getInt(p); + testCase.checkResults.check(page.getBlock(1), p, key, indexKey); + } + } + } + + public void testEmpty() { + DriverContext driverContext = driverContext(); + loadSimpleAndAssert( + driverContext, + CannedSourceOperator.collectPages(simpleInput(driverContext, 0)), + Block.MvOrdering.UNORDERED, + Block.MvOrdering.UNORDERED + ); + } + + public void testLoadAllInOnePageShuffled() { + DriverContext driverContext = driverContext(); + Page source = CannedSourceOperator.mergePages(CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000)))); + List shuffleList = new ArrayList<>(); + IntStream.range(0, source.getPositionCount()).forEach(shuffleList::add); + Randomness.shuffle(shuffleList); + int[] shuffleArray = shuffleList.stream().mapToInt(Integer::intValue).toArray(); + Block[] shuffledBlocks = new Block[source.getBlockCount()]; + for (int b = 0; b < shuffledBlocks.length; b++) { + shuffledBlocks[b] = source.getBlock(b).filter(shuffleArray); + } + source = new Page(shuffledBlocks); + loadSimpleAndAssert(driverContext, List.of(source), Block.MvOrdering.UNORDERED, Block.MvOrdering.UNORDERED); + } + + private static ValuesSourceReaderOperator.FieldInfo fieldInfo(MappedFieldType ft, ElementType elementType) { + return new ValuesSourceReaderOperator.FieldInfo(ft.name(), elementType, shardIdx -> getBlockLoaderFor(shardIdx, ft, null)); + } + + private static ValuesSourceReaderOperator.FieldInfo fieldInfo(MappedFieldType ft, MappedFieldType ftX, ElementType elementType) { + return new ValuesSourceReaderOperator.FieldInfo(ft.name(), elementType, shardIdx -> getBlockLoaderFor(shardIdx, ft, ftX)); + } + + private ValuesSourceReaderOperator.FieldInfo fieldInfo(String fieldName, ElementType elementType, DataType toType) { + return new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, shardIdx -> getBlockLoaderFor(shardIdx, fieldName, toType)); + } + + private static MappedFieldType.BlockLoaderContext blContext() { + return new MappedFieldType.BlockLoaderContext() { + @Override + public String indexName() { + return "test_index"; + } + + @Override + public MappedFieldType.FieldExtractPreference fieldExtractPreference() { + return MappedFieldType.FieldExtractPreference.NONE; + } + + @Override + public SearchLookup lookup() { + throw new UnsupportedOperationException(); + } + + @Override + public Set sourcePaths(String name) { + return Set.of(name); + } + + @Override + public String parentField(String field) { + return null; + } + + @Override + public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() { + return FieldNamesFieldMapper.FieldNamesFieldType.get(true); + } + }; + } + + private void loadSimpleAndAssert( + DriverContext driverContext, + List input, + Block.MvOrdering booleanAndNumericalDocValuesMvOrdering, + Block.MvOrdering bytesRefDocValuesMvOrdering + ) { + List cases = infoAndChecksForEachType(booleanAndNumericalDocValuesMvOrdering, bytesRefDocValuesMvOrdering); + List shardContexts = initShardContexts(); + List operators = new ArrayList<>(); + operators.add( + new ValuesSourceReaderOperator.Factory( + List.of( + fieldInfo(mapperService("index1").fieldType("key"), ElementType.INT), + fieldInfo(mapperService("index1").fieldType("indexKey"), ElementType.BYTES_REF) + ), + shardContexts, + 0 + ).get(driverContext) + ); + List tests = new ArrayList<>(); + while (cases.isEmpty() == false) { + List b = randomNonEmptySubsetOf(cases); + cases.removeAll(b); + tests.addAll(b); + operators.add( + new ValuesSourceReaderOperator.Factory(b.stream().map(i -> i.info).toList(), shardContexts, 0).get(driverContext) + ); + } + List results = drive(operators, input.iterator(), driverContext); + assertThat(results, hasSize(input.size())); + for (Page page : results) { + assertThat(page.getBlockCount(), equalTo(tests.size() + 3 /* one for doc, one for keys and one for indexKey */)); + IntVector keys = page.getBlock(1).asVector(); + BytesRefVector indexKeys = page.getBlock(2).asVector(); + for (int p = 0; p < page.getPositionCount(); p++) { + int key = keys.getInt(p); + String indexKey = indexKeys.getBytesRef(p, new BytesRef()).utf8ToString(); + for (int i = 0; i < tests.size(); i++) { + try { + tests.get(i).checkResults.check(page.getBlock(3 + i), p, key, indexKey); + } catch (AssertionError e) { + throw new AssertionError("error checking " + tests.get(i).info.name() + "[" + p + "]: " + e.getMessage(), e); + } + } + } + } + for (Operator op : operators) { + assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size())); + } + assertDriverContext(driverContext); + } + + interface CheckResults { + void check(Block block, int position, int key, String indexKey); + } + + interface CheckReaders { + void check(boolean forcedRowByRow, int pageCount, int segmentCount, Map readersBuilt); + } + + interface CheckReadersWithName { + void check(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map readersBuilt); + } + + record FieldCase(ValuesSourceReaderOperator.FieldInfo info, CheckResults checkResults, CheckReadersWithName checkReaders) { + FieldCase(MappedFieldType ft, ElementType elementType, CheckResults checkResults, CheckReadersWithName checkReaders) { + this(fieldInfo(ft, elementType), checkResults, checkReaders); + } + + FieldCase( + MappedFieldType ft, + MappedFieldType ftX, + ElementType elementType, + CheckResults checkResults, + CheckReadersWithName checkReaders + ) { + this(fieldInfo(ft, ftX, elementType), checkResults, checkReaders); + } + + FieldCase(MappedFieldType ft, ElementType elementType, CheckResults checkResults, CheckReaders checkReaders) { + this( + ft, + elementType, + checkResults, + (name, forcedRowByRow, pageCount, segmentCount, readersBuilt) -> checkReaders.check( + forcedRowByRow, + pageCount, + segmentCount, + readersBuilt + ) + ); + } + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading singleton pages. + */ + public void testLoadAllStatus() { + testLoadAllStatus(false); + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading non-singleton pages. + */ + public void testLoadAllStatusAllInOnePage() { + testLoadAllStatus(true); + } + + private void testLoadAllStatus(boolean allInOnePage) { + DriverContext driverContext = driverContext(); + int numDocs = between(100, 5000); + List input = CannedSourceOperator.collectPages(simpleInput(driverContext, numDocs, commitEvery(numDocs), numDocs)); + assertThat(input, hasSize(20)); + List shardContexts = initShardContexts(); + int totalSize = 0; + for (var shardContext : shardContexts) { + assertThat(shardContext.reader().leaves(), hasSize(10)); + totalSize += shardContext.reader().leaves().size(); + } + // Build one operator for each field, so we get a unique map to assert on + List cases = infoAndChecksForEachType( + Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING, + Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING + ); + List operators = cases.stream() + .map(i -> new ValuesSourceReaderOperator.Factory(List.of(i.info), shardContexts, 0).get(driverContext)) + .toList(); + if (allInOnePage) { + input = List.of(CannedSourceOperator.mergePages(input)); + } + drive(operators, input.iterator(), driverContext); + for (int i = 0; i < cases.size(); i++) { + ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) operators.get(i).status(); + assertThat(status.pagesProcessed(), equalTo(input.size())); + FieldCase fc = cases.get(i); + fc.checkReaders.check(fc.info.name(), allInOnePage, input.size(), totalSize, status.readersBuilt()); + } + } + + private List infoAndChecksForEachType( + Block.MvOrdering booleanAndNumericalDocValuesMvOrdering, + Block.MvOrdering bytesRefDocValuesMvOrdering + ) { + MapperService mapperService = mapperService("index1"); // almost fields have identical mapper service + Checks checks = new Checks(booleanAndNumericalDocValuesMvOrdering, bytesRefDocValuesMvOrdering); + List r = new ArrayList<>(); + r.add(new FieldCase(mapperService.fieldType(IdFieldMapper.NAME), ElementType.BYTES_REF, checks::ids, StatusChecks::id)); + r.add(new FieldCase(TsidExtractingIdFieldMapper.INSTANCE.fieldType(), ElementType.BYTES_REF, checks::ids, StatusChecks::id)); + r.add(new FieldCase(mapperService.fieldType("long"), ElementType.LONG, checks::longs, StatusChecks::longsFromDocValues)); + r.add( + new FieldCase( + mapperService.fieldType("str_long"), + mapperService.fieldType("long"), + ElementType.LONG, + checks::longs, + StatusChecks::strFromDocValues + ) + ); + r.add( + new FieldCase( + mapperService.fieldType("mv_long"), + ElementType.LONG, + checks::mvLongsFromDocValues, + StatusChecks::mvLongsFromDocValues + ) + ); + r.add(new FieldCase(mapperService.fieldType("missing_long"), ElementType.LONG, checks::constantNulls, StatusChecks::constantNulls)); + r.add(new FieldCase(mapperService.fieldType("source_long"), ElementType.LONG, checks::longs, StatusChecks::longsFromSource)); + r.add( + new FieldCase( + mapperService.fieldType("mv_source_long"), + ElementType.LONG, + checks::mvLongsUnordered, + StatusChecks::mvLongsFromSource + ) + ); + r.add(new FieldCase(mapperService.fieldType("int"), ElementType.INT, checks::ints, StatusChecks::intsFromDocValues)); + r.add( + new FieldCase( + mapperService.fieldType("str_int"), + mapperService.fieldType("int"), + ElementType.INT, + checks::ints, + StatusChecks::strFromDocValues + ) + ); + r.add( + new FieldCase( + mapperService.fieldType("mv_int"), + ElementType.INT, + checks::mvIntsFromDocValues, + StatusChecks::mvIntsFromDocValues + ) + ); + r.add(new FieldCase(mapperService.fieldType("missing_int"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls)); + r.add(new FieldCase(mapperService.fieldType("source_int"), ElementType.INT, checks::ints, StatusChecks::intsFromSource)); + r.add( + new FieldCase( + mapperService.fieldType("mv_source_int"), + ElementType.INT, + checks::mvIntsUnordered, + StatusChecks::mvIntsFromSource + ) + ); + r.add(new FieldCase(mapperService.fieldType("short"), ElementType.INT, checks::shorts, StatusChecks::shortsFromDocValues)); + r.add( + new FieldCase( + mapperService.fieldType("str_short"), + mapperService.fieldType("short"), + ElementType.INT, + checks::shorts, + StatusChecks::strFromDocValues + ) + ); + r.add(new FieldCase(mapperService.fieldType("mv_short"), ElementType.INT, checks::mvShorts, StatusChecks::mvShortsFromDocValues)); + r.add(new FieldCase(mapperService.fieldType("missing_short"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls)); + r.add(new FieldCase(mapperService.fieldType("byte"), ElementType.INT, checks::bytes, StatusChecks::bytesFromDocValues)); + // r.add(new FieldCase(mapperService.fieldType("str_byte"), ElementType.INT, checks::bytes, StatusChecks::bytesFromDocValues)); + r.add(new FieldCase(mapperService.fieldType("mv_byte"), ElementType.INT, checks::mvBytes, StatusChecks::mvBytesFromDocValues)); + r.add(new FieldCase(mapperService.fieldType("missing_byte"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls)); + r.add(new FieldCase(mapperService.fieldType("double"), ElementType.DOUBLE, checks::doubles, StatusChecks::doublesFromDocValues)); + r.add( + new FieldCase( + mapperService.fieldType("str_double"), + mapperService.fieldType("double"), + ElementType.DOUBLE, + checks::doubles, + StatusChecks::strFromDocValues + ) + ); + r.add( + new FieldCase(mapperService.fieldType("mv_double"), ElementType.DOUBLE, checks::mvDoubles, StatusChecks::mvDoublesFromDocValues) + ); + r.add( + new FieldCase(mapperService.fieldType("missing_double"), ElementType.DOUBLE, checks::constantNulls, StatusChecks::constantNulls) + ); + r.add(new FieldCase(mapperService.fieldType("kwd"), ElementType.BYTES_REF, checks::tags, StatusChecks::keywordsFromDocValues)); + r.add( + new FieldCase( + mapperService.fieldType("mv_kwd"), + ElementType.BYTES_REF, + checks::mvStringsFromDocValues, + StatusChecks::mvKeywordsFromDocValues + ) + ); + r.add( + new FieldCase(mapperService.fieldType("missing_kwd"), ElementType.BYTES_REF, checks::constantNulls, StatusChecks::constantNulls) + ); + r.add(new FieldCase(storedKeywordField("stored_kwd"), ElementType.BYTES_REF, checks::strings, StatusChecks::keywordsFromStored)); + r.add( + new FieldCase( + storedKeywordField("mv_stored_kwd"), + ElementType.BYTES_REF, + checks::mvStringsUnordered, + StatusChecks::mvKeywordsFromStored + ) + ); + r.add( + new FieldCase(mapperService.fieldType("source_kwd"), ElementType.BYTES_REF, checks::strings, StatusChecks::keywordsFromSource) + ); + r.add( + new FieldCase( + mapperService.fieldType("mv_source_kwd"), + ElementType.BYTES_REF, + checks::mvStringsUnordered, + StatusChecks::mvKeywordsFromSource + ) + ); + r.add( + new FieldCase( + new ValuesSourceReaderOperator.FieldInfo( + "constant_bytes", + ElementType.BYTES_REF, + shardIdx -> BlockLoader.constantBytes(new BytesRef("foo")) + ), + checks::constantBytes, + StatusChecks::constantBytes + ) + ); + r.add( + new FieldCase( + new ValuesSourceReaderOperator.FieldInfo("null", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS), + checks::constantNulls, + StatusChecks::constantNulls + ) + ); + + // We only care about the field name at this point, so we can use any index mapper here + TestIndexMappingConfig indexMappingConfig = INDICES.get("index1"); + for (TestFieldType fieldType : indexMappingConfig.fieldTypes.values()) { + r.add( + new FieldCase( + fieldInfo(fieldType.name, ElementType.BYTES_REF, DataType.KEYWORD), + fieldType.checkResults, + StatusChecks::unionFromDocValues + ) + ); + } + Collections.shuffle(r, random()); + return r; + } + + record Checks(Block.MvOrdering booleanAndNumericalDocValuesMvOrdering, Block.MvOrdering bytesRefDocValuesMvOrdering) { + void longs(Block block, int position, int key, String indexKey) { + LongVector longs = ((LongBlock) block).asVector(); + assertThat(longs.getLong(position), equalTo((long) key)); + } + + void ints(Block block, int position, int key, String indexKey) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo(key)); + } + + void shorts(Block block, int position, int key, String indexKey) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo((int) (short) key)); + } + + void bytes(Block block, int position, int key, String indexKey) { + IntVector ints = ((IntBlock) block).asVector(); + assertThat(ints.getInt(position), equalTo((int) (byte) key)); + } + + void doubles(Block block, int position, int key, String indexKey) { + DoubleVector doubles = ((DoubleBlock) block).asVector(); + assertThat(doubles.getDouble(position), equalTo(key / 123_456d)); + } + + void strings(Block block, int position, int key, String indexKey) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo(Integer.toString(key))); + } + + static void unionIPsAsStrings(Block block, int position, int key, String indexKey) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + BytesRef bytesRef = keywords.getBytesRef(position, new BytesRef()); + TestIndexMappingConfig mappingConfig = INDICES.get(indexKey); + TestFieldType fieldType = mappingConfig.fieldTypes.get("ip"); + String expected = fieldType.valueGenerator.apply(key).toString(); + // Conversion should already be done in FieldInfo! + // BytesRef found = (fieldType.dataType.typeName().equals("ip")) ? new BytesRef(DocValueFormat.IP.format(bytesRef)) : bytesRef; + assertThat(bytesRef.utf8ToString(), equalTo(expected)); + } + + static void unionDurationsAsStrings(Block block, int position, int key, String indexKey) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + BytesRef bytesRef = keywords.getBytesRef(position, new BytesRef()); + TestIndexMappingConfig mappingConfig = INDICES.get(indexKey); + TestFieldType fieldType = mappingConfig.fieldTypes.get("duration"); + String expected = fieldType.valueGenerator.apply(key).toString(); + assertThat(bytesRef.utf8ToString(), equalTo(expected)); + } + + void tags(Block block, int position, int key, String indexKey) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + Object[] validTags = INDICES.keySet().stream().map(keyToTags::get).map(t -> t.get(key)).toArray(); + assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), oneOf(validTags)); + } + + void ids(Block block, int position, int key, String indexKey) { + BytesRefVector ids = ((BytesRefBlock) block).asVector(); + assertThat(ids.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("id" + key)); + } + + void constantBytes(Block block, int position, int key, String indexKey) { + BytesRefVector keywords = ((BytesRefBlock) block).asVector(); + assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("foo")); + } + + void constantNulls(Block block, int position, int key, String indexKey) { + assertTrue(block.areAllValuesNull()); + assertTrue(block.isNull(position)); + } + + void mvLongsFromDocValues(Block block, int position, int key, String indexKey) { + mvLongs(block, position, key, booleanAndNumericalDocValuesMvOrdering); + } + + void mvLongsUnordered(Block block, int position, int key, String indexKey) { + mvLongs(block, position, key, Block.MvOrdering.UNORDERED); + } + + private void mvLongs(Block block, int position, int key, Block.MvOrdering expectedMv) { + LongBlock longs = (LongBlock) block; + assertThat(longs.getValueCount(position), equalTo(key % 3 + 1)); + int offset = longs.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(longs.getLong(offset + v), equalTo(-1_000L * key + v)); + } + if (key % 3 > 0) { + assertThat(longs.mvOrdering(), equalTo(expectedMv)); + } + } + + void mvIntsFromDocValues(Block block, int position, int key, String indexKey) { + mvInts(block, position, key, booleanAndNumericalDocValuesMvOrdering); + } + + void mvIntsUnordered(Block block, int position, int key, String indexKey) { + mvInts(block, position, key, Block.MvOrdering.UNORDERED); + } + + private void mvInts(Block block, int position, int key, Block.MvOrdering expectedMv) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo(1_000 * key + v)); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(expectedMv)); + } + } + + void mvShorts(Block block, int position, int key, String indexKey) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo((int) (short) (2_000 * key + v))); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering)); + } + } + + void mvBytes(Block block, int position, int key, String indexKey) { + IntBlock ints = (IntBlock) block; + assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); + int offset = ints.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(ints.getInt(offset + v), equalTo((int) (byte) (3_000 * key + v))); + } + if (key % 3 > 0) { + assertThat(ints.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering)); + } + } + + void mvDoubles(Block block, int position, int key, String indexKey) { + DoubleBlock doubles = (DoubleBlock) block; + int offset = doubles.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(doubles.getDouble(offset + v), equalTo(key / 123_456d + v)); + } + if (key % 3 > 0) { + assertThat(doubles.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering)); + } + } + + void mvStringsFromDocValues(Block block, int position, int key, String indexKey) { + mvStrings(block, position, key, bytesRefDocValuesMvOrdering); + } + + void mvStringsUnordered(Block block, int position, int key, String indexKey) { + mvStrings(block, position, key, Block.MvOrdering.UNORDERED); + } + + void mvStrings(Block block, int position, int key, Block.MvOrdering expectedMv) { + BytesRefBlock text = (BytesRefBlock) block; + assertThat(text.getValueCount(position), equalTo(key % 3 + 1)); + int offset = text.getFirstValueIndex(position); + for (int v = 0; v <= key % 3; v++) { + assertThat(text.getBytesRef(offset + v, new BytesRef()).utf8ToString(), equalTo(PREFIX[v] + key)); + } + if (key % 3 > 0) { + assertThat(text.mvOrdering(), equalTo(expectedMv)); + } + } + } + + static class StatusChecks { + + static void strFromDocValues(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues(name, "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void longsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void longsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void intsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void intsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void shortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("short", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void bytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void doublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + docValues("kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void keywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvLongsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvLongsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvIntsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvIntsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvShortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_short", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvBytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvDoublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("mv_stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvKeywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + source("mv_source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void unionFromDocValues(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + // TODO: develop a working check for this + // docValues(name, "Ordinals", forcedRowByRow, pageCount, segmentCount, readers); + } + + private static void docValues( + String name, + String type, + boolean forcedRowByRow, + int pageCount, + int segmentCount, + Map readers + ) { + if (forcedRowByRow) { + assertMap( + "Expected segment count in " + readers + "\n", + readers, + matchesMap().entry(name + ":row_stride:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(segmentCount)) + ); + } else { + assertMap( + "Expected segment count in " + readers + "\n", + readers, + matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(pageCount)) + ); + } + } + + private static void mvDocValues( + String name, + String type, + boolean forcedRowByRow, + int pageCount, + int segmentCount, + Map readers + ) { + if (forcedRowByRow) { + Integer singletons = (Integer) readers.remove(name + ":row_stride:BlockDocValuesReader.Singleton" + type); + if (singletons != null) { + segmentCount -= singletons; + } + assertMap(readers, matchesMap().entry(name + ":row_stride:BlockDocValuesReader." + type, segmentCount)); + } else { + Integer singletons = (Integer) readers.remove(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type); + if (singletons != null) { + pageCount -= singletons; + } + assertMap( + readers, + matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader." + type, lessThanOrEqualTo(pageCount)) + ); + } + } + + static void id(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + stored("_id", "Id", forcedRowByRow, pageCount, segmentCount, readers); + } + + private static void source(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + Matcher count; + if (forcedRowByRow) { + count = equalTo(segmentCount); + } else { + count = lessThanOrEqualTo(pageCount); + Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null"); + assertThat(columnAttempts, not(nullValue())); + } + + Integer sequentialCount = (Integer) readers.remove("stored_fields[requires_source:true, fields:0, sequential: true]"); + Integer nonSequentialCount = (Integer) readers.remove("stored_fields[requires_source:true, fields:0, sequential: false]"); + int totalReaders = (sequentialCount == null ? 0 : sequentialCount) + (nonSequentialCount == null ? 0 : nonSequentialCount); + assertThat(totalReaders, count); + + assertMap(readers, matchesMap().entry(name + ":row_stride:BlockSourceReader." + type, count)); + } + + private static void stored(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + Matcher count; + if (forcedRowByRow) { + count = equalTo(segmentCount); + } else { + count = lessThanOrEqualTo(pageCount); + Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null"); + assertThat(columnAttempts, not(nullValue())); + } + + Integer sequentialCount = (Integer) readers.remove("stored_fields[requires_source:false, fields:1, sequential: true]"); + Integer nonSequentialCount = (Integer) readers.remove("stored_fields[requires_source:false, fields:1, sequential: false]"); + int totalReaders = (sequentialCount == null ? 0 : sequentialCount) + (nonSequentialCount == null ? 0 : nonSequentialCount); + assertThat(totalReaders, count); + + assertMap(readers, matchesMap().entry(name + ":row_stride:BlockStoredFieldsReader." + type, count)); + } + + static void constantBytes(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry(name + ":row_stride:constant[[66 6f 6f]]", segmentCount)); + } else { + assertMap(readers, matchesMap().entry(name + ":column_at_a_time:constant[[66 6f 6f]]", lessThanOrEqualTo(pageCount))); + } + } + + static void constantNulls(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry(name + ":row_stride:constant_nulls", segmentCount)); + } else { + assertMap(readers, matchesMap().entry(name + ":column_at_a_time:constant_nulls", lessThanOrEqualTo(pageCount))); + } + } + } + + public void testWithNulls() throws IOException { + String indexKey = "index1"; + mapperServices.put(indexKey, new MapperServiceTestCase() { + }.createMapperService(MapperServiceTestCase.mapping(b -> { + fieldExamples(b, "i", "integer"); + fieldExamples(b, "j", "long"); + fieldExamples(b, "d", "double"); + }))); + MappedFieldType intFt = mapperService(indexKey).fieldType("i"); + MappedFieldType longFt = mapperService(indexKey).fieldType("j"); + MappedFieldType doubleFt = mapperService(indexKey).fieldType("d"); + MappedFieldType kwFt = new KeywordFieldMapper.KeywordFieldType("kw"); + + NumericDocValuesField intField = new NumericDocValuesField(intFt.name(), 0); + NumericDocValuesField longField = new NumericDocValuesField(longFt.name(), 0); + NumericDocValuesField doubleField = new DoubleDocValuesField(doubleFt.name(), 0); + final int numDocs = between(100, 5000); + try (RandomIndexWriter w = new RandomIndexWriter(random(), directory(indexKey))) { + Document doc = new Document(); + for (int i = 0; i < numDocs; i++) { + doc.clear(); + intField.setLongValue(i); + doc.add(intField); + if (i % 100 != 0) { // Do not set field for every 100 values + longField.setLongValue(i); + doc.add(longField); + doubleField.setDoubleValue(i); + doc.add(doubleField); + doc.add(new SortedDocValuesField(kwFt.name(), new BytesRef("kw=" + i))); + } + w.addDocument(doc); + } + w.commit(); + readers.put(indexKey, w.getReader()); + } + LuceneSourceOperatorTests.MockShardContext shardContext = new LuceneSourceOperatorTests.MockShardContext(reader(indexKey), 0); + DriverContext driverContext = driverContext(); + var luceneFactory = new LuceneSourceOperator.Factory( + List.of(shardContext), + ctx -> new MatchAllDocsQuery(), + randomFrom(DataPartitioning.values()), + randomIntBetween(1, 10), + randomPageSize(), + LuceneOperator.NO_LIMIT + ); + var vsShardContext = new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE); + try ( + Driver driver = new Driver( + driverContext, + luceneFactory.get(driverContext), + List.of( + factory(List.of(vsShardContext), intFt, ElementType.INT).get(driverContext), + factory(List.of(vsShardContext), longFt, ElementType.LONG).get(driverContext), + factory(List.of(vsShardContext), doubleFt, ElementType.DOUBLE).get(driverContext), + factory(List.of(vsShardContext), kwFt, ElementType.BYTES_REF).get(driverContext) + ), + new PageConsumerOperator(page -> { + try { + logger.debug("New page: {}", page); + IntBlock intValuesBlock = page.getBlock(1); + LongBlock longValuesBlock = page.getBlock(2); + DoubleBlock doubleValuesBlock = page.getBlock(3); + BytesRefBlock keywordValuesBlock = page.getBlock(4); + + for (int i = 0; i < page.getPositionCount(); i++) { + assertFalse(intValuesBlock.isNull(i)); + long j = intValuesBlock.getInt(i); + // Every 100 documents we set fields to null + boolean fieldIsEmpty = j % 100 == 0; + assertEquals(fieldIsEmpty, longValuesBlock.isNull(i)); + assertEquals(fieldIsEmpty, doubleValuesBlock.isNull(i)); + assertEquals(fieldIsEmpty, keywordValuesBlock.isNull(i)); + } + } finally { + page.releaseBlocks(); + } + }), + () -> {} + ) + ) { + runDriver(driver); + } + assertDriverContext(driverContext); + } + + private XContentBuilder fieldExamples(XContentBuilder builder, String name, String type) throws IOException { + simpleField(builder, name, type); + simpleField(builder, "str_" + name, "keyword"); + simpleField(builder, "mv_" + name, type); + simpleField(builder, "missing_" + name, type); + sourceField(builder, "source_" + name, type); + return sourceField(builder, "mv_source_" + name, type); + } + + private XContentBuilder simpleField(XContentBuilder builder, String name, String type) throws IOException { + return builder.startObject(name).field("type", type).endObject(); + } + + private XContentBuilder sourceField(XContentBuilder builder, String name, String type) throws IOException { + return builder.startObject(name).field("type", type).field("store", false).field("doc_values", false).endObject(); + } + + private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) { + FieldType ft = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE); + ft.setDocValuesType(DocValuesType.NONE); + ft.setStored(true); + ft.freeze(); + return new KeywordFieldMapper.KeywordFieldType( + name, + ft, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + Lucene.KEYWORD_ANALYZER, + new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false), + true // TODO randomize - load from stored keyword fields if stored even in synthetic source + ); + } + + @AwaitsFix(bugUrl = "Get working for multiple indices") + public void testNullsShared() { + DriverContext driverContext = driverContext(); + List shardContexts = initShardContexts(); + int[] pages = new int[] { 0 }; + try ( + Driver d = new Driver( + driverContext, + simpleInput(driverContext, 10), + List.of( + new ValuesSourceReaderOperator.Factory( + List.of( + new ValuesSourceReaderOperator.FieldInfo("null1", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS), + new ValuesSourceReaderOperator.FieldInfo("null2", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS) + ), + shardContexts, + 0 + ).get(driverContext) + ), + new PageConsumerOperator(page -> { + try { + assertThat(page.getBlockCount(), equalTo(3)); + assertThat(page.getBlock(1).areAllValuesNull(), equalTo(true)); + assertThat(page.getBlock(2).areAllValuesNull(), equalTo(true)); + assertThat(page.getBlock(1), sameInstance(page.getBlock(2))); + pages[0]++; + } finally { + page.releaseBlocks(); + } + }), + () -> {} + ) + ) { + runDriver(d); + } + assertThat(pages[0], greaterThan(0)); + assertDriverContext(driverContext); + } + + public void testDescriptionOfMany() throws IOException { + String indexKey = "index1"; + initIndex(indexKey, 1, 1); + Block.MvOrdering ordering = randomFrom(Block.MvOrdering.values()); + List cases = infoAndChecksForEachType(ordering, ordering); + + ValuesSourceReaderOperator.Factory factory = new ValuesSourceReaderOperator.Factory( + cases.stream().map(c -> c.info).toList(), + List.of(new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE)), + 0 + ); + assertThat(factory.describe(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]")); + try (Operator op = factory.get(driverContext())) { + assertThat(op.toString(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]")); + } + } + + public void testManyShards() throws IOException { + String indexKey = "index1"; + initMapping(indexKey); + int shardCount = between(2, 10); + int size = between(100, 1000); + Directory[] dirs = new Directory[shardCount]; + IndexReader[] readers = new IndexReader[shardCount]; + Closeable[] closeMe = new Closeable[shardCount * 2]; + Set seenShards = new TreeSet<>(); + Map keyCounts = new TreeMap<>(); + try { + for (int d = 0; d < dirs.length; d++) { + closeMe[d * 2 + 1] = dirs[d] = newDirectory(); + closeMe[d * 2] = readers[d] = initIndex(indexKey, dirs[d], size, between(10, size * 2)); + } + List contexts = new ArrayList<>(); + List readerShardContexts = new ArrayList<>(); + for (int s = 0; s < shardCount; s++) { + contexts.add(new LuceneSourceOperatorTests.MockShardContext(readers[s], s)); + readerShardContexts.add(new ValuesSourceReaderOperator.ShardContext(readers[s], () -> SourceLoader.FROM_STORED_SOURCE)); + } + var luceneFactory = new LuceneSourceOperator.Factory( + contexts, + ctx -> new MatchAllDocsQuery(), + DataPartitioning.SHARD, + randomIntBetween(1, 10), + 1000, + LuceneOperator.NO_LIMIT + ); + // TODO add index2 + MappedFieldType ft = mapperService(indexKey).fieldType("key"); + var readerFactory = new ValuesSourceReaderOperator.Factory( + List.of(new ValuesSourceReaderOperator.FieldInfo("key", ElementType.INT, shardIdx -> { + seenShards.add(shardIdx); + return ft.blockLoader(blContext()); + })), + readerShardContexts, + 0 + ); + DriverContext driverContext = driverContext(); + List results = drive( + readerFactory.get(driverContext), + CannedSourceOperator.collectPages(luceneFactory.get(driverContext)).iterator(), + driverContext + ); + assertThat(seenShards, equalTo(IntStream.range(0, shardCount).boxed().collect(Collectors.toCollection(TreeSet::new)))); + for (Page p : results) { + IntBlock keyBlock = p.getBlock(1); + IntVector keys = keyBlock.asVector(); + for (int i = 0; i < keys.getPositionCount(); i++) { + keyCounts.merge(keys.getInt(i), 1, Integer::sum); + } + } + assertThat(keyCounts.keySet(), hasSize(size)); + for (int k = 0; k < size; k++) { + assertThat(keyCounts.get(k), equalTo(shardCount)); + } + } finally { + IOUtils.close(closeMe); + } + } + + protected final List drive(Operator operator, Iterator input, DriverContext driverContext) { + return drive(List.of(operator), input, driverContext); + } + + protected final List drive(List operators, Iterator input, DriverContext driverContext) { + List results = new ArrayList<>(); + boolean success = false; + try ( + Driver d = new Driver( + driverContext, + new CannedSourceOperator(input), + operators, + new TestResultPageSinkOperator(results::add), + () -> {} + ) + ) { + runDriver(d); + success = true; + } finally { + if (success == false) { + Releasables.closeExpectNoException(Releasables.wrap(() -> Iterators.map(results.iterator(), p -> p::releaseBlocks))); + } + } + return results; + } + + public static void runDriver(Driver driver) { + runDriver(List.of(driver)); + } + + public static void runDriver(List drivers) { + drivers = new ArrayList<>(drivers); + int dummyDrivers = between(0, 10); + for (int i = 0; i < dummyDrivers; i++) { + drivers.add( + new Driver( + "dummy-session", + 0, + 0, + new DriverContext(BigArrays.NON_RECYCLING_INSTANCE, TestBlockFactory.getNonBreakingInstance()), + () -> "dummy-driver", + new SequenceLongBlockSourceOperator( + TestBlockFactory.getNonBreakingInstance(), + LongStream.range(0, between(1, 100)), + between(1, 100) + ), + List.of(), + new PageConsumerOperator(Page::releaseBlocks), + Driver.DEFAULT_STATUS_INTERVAL, + () -> {} + ) + ); + } + Randomness.shuffle(drivers); + int numThreads = between(1, 16); + ThreadPool threadPool = new TestThreadPool( + getTestClass().getSimpleName(), + new FixedExecutorBuilder(Settings.EMPTY, "esql", numThreads, 1024, "esql", EsExecutors.TaskTrackingConfig.DEFAULT) + ); + var driverRunner = new DriverRunner(threadPool.getThreadContext()) { + @Override + protected void start(Driver driver, ActionListener driverListener) { + Driver.start(threadPool.getThreadContext(), threadPool.executor("esql"), driver, between(1, 10000), driverListener); + } + }; + PlainActionFuture future = new PlainActionFuture<>(); + try { + driverRunner.runToCompletion(drivers, future); + future.actionGet(TimeValue.timeValueSeconds(30)); + } finally { + terminate(threadPool); + } + } + + public static void assertDriverContext(DriverContext driverContext) { + assertTrue(driverContext.isFinished()); + assertThat(driverContext.getSnapshot().releasables(), empty()); + } + + public static int randomPageSize() { + if (randomBoolean()) { + return between(1, 16); + } else { + return between(1, 16 * 1024); + } + } + + /** + * This method will produce the same converter for all shards, which makes it useful for general type converting tests, + * but not specifically union-types tests which require different converters for each shard. + */ + private static BlockLoader getBlockLoaderFor(int shardIdx, MappedFieldType ft, MappedFieldType ftX) { + if (shardIdx < 0 || shardIdx >= INDICES.size()) { + fail("unexpected shardIdx [" + shardIdx + "]"); + } + BlockLoader blockLoader = ft.blockLoader(blContext()); + if (ftX != null && ftX.typeName().equals(ft.typeName()) == false) { + blockLoader = new TestTypeConvertingBlockLoader(blockLoader, ft.typeName(), ftX.typeName()); + } else { + TestIndexMappingConfig mappingConfig = INDICES.get("index" + (shardIdx + 1)); + TestFieldType testFieldType = mappingConfig.fieldTypes.get(ft.name()); + if (testFieldType != null) { + blockLoader = new TestTypeConvertingBlockLoader(blockLoader, testFieldType.dataType.typeName(), "keyword"); + } + } + return blockLoader; + } + + /** + * This method is used to generate shard-specific field information, so we can have different types and BlockLoaders for each shard. + */ + private BlockLoader getBlockLoaderFor(int shardIdx, String fieldName, DataType toType) { + if (shardIdx < 0 || shardIdx >= INDICES.size()) { + fail("unexpected shardIdx [" + shardIdx + "]"); + } + String indexKey = "index" + (shardIdx + 1); + TestIndexMappingConfig mappingConfig = INDICES.get(indexKey); + TestFieldType testFieldType = mappingConfig.fieldTypes.get(fieldName); + if (testFieldType == null) { + throw new IllegalArgumentException("Unknown test field: " + fieldName); + } + MapperService mapper = mapperService(indexKey); + MappedFieldType ft = mapper.fieldType(fieldName); + BlockLoader blockLoader = ft.blockLoader(blContext()); + blockLoader = new TestTypeConvertingBlockLoader(blockLoader, testFieldType.dataType.typeName(), toType.typeName()); + return blockLoader; + } + + /** + * The implementation of union-types relies on the BlockLoader.convert(Block) to convert the block to the correct type + * at the point it is read from source, so that the rest of the query only deals with a single type for that field. + * This is implemented in the 'esql' module, and so we have a mock for this behaviour here, which is a simplified subset of the + * features in the real implementation. + */ + static class TestTypeConvertingBlockLoader implements BlockLoader { + protected final BlockLoader delegate; + private final EvalOperator.ExpressionEvaluator convertEvaluator; + + protected TestTypeConvertingBlockLoader(BlockLoader delegate, String fromTypeName, String toTypeName) { + this.delegate = delegate; + DriverContext driverContext = new DriverContext( + BigArrays.NON_RECYCLING_INSTANCE, + new org.elasticsearch.compute.data.BlockFactory( + new NoopCircuitBreaker(CircuitBreaker.REQUEST), + BigArrays.NON_RECYCLING_INSTANCE + ) + ); + TestBlockConverter blockConverter = TestDataTypeConverters.blockConverter(driverContext, fromTypeName, toTypeName); + this.convertEvaluator = new EvalOperator.ExpressionEvaluator() { + @Override + public org.elasticsearch.compute.data.Block eval(Page page) { + org.elasticsearch.compute.data.Block block = page.getBlock(0); + return blockConverter.convert(block); + } + + @Override + public void close() {} + }; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + // Return the delegates builder, which can build the original mapped type, before conversion + return delegate.builder(factory, expectedCount); + } + + @Override + public Block convert(Block block) { + Page page = new Page((org.elasticsearch.compute.data.Block) block); + return convertEvaluator.eval(page); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + ColumnAtATimeReader reader = delegate.columnAtATimeReader(context); + if (reader == null) { + return null; + } + return new ColumnAtATimeReader() { + @Override + public Block read(BlockFactory factory, Docs docs) throws IOException { + Block block = reader.read(factory, docs); + Page page = new Page((org.elasticsearch.compute.data.Block) block); + return convertEvaluator.eval(page); + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return reader.toString(); + } + }; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + // We do no type conversion here, since that will be done in the ValueSourceReaderOperator for row-stride cases + // Using the BlockLoader.convert(Block) function defined above + return delegate.rowStrideReader(context); + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return delegate.rowStrideStoredFieldSpec(); + } + + @Override + public boolean supportsOrdinals() { + return delegate.supportsOrdinals(); + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return delegate.ordinals(context); + } + + @Override + public final String toString() { + return "TypeConvertingBlockLoader[delegate=" + delegate + "]"; + } + } + + @FunctionalInterface + private interface TestBlockConverter { + Block convert(Block block); + } + + /** + * Blocks that should be converted from some type to a string (keyword) can use this converter. + */ + private abstract static class BlockToStringConverter implements TestBlockConverter { + private final DriverContext driverContext; + + BlockToStringConverter(DriverContext driverContext) { + this.driverContext = driverContext; + } + + @Override + public Block convert(Block block) { + int positionCount = block.getPositionCount(); + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + BytesRef value = evalValue(block, i); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendBytesRef(value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } finally { + block.close(); + } + } + + abstract BytesRef evalValue(Block container, int index); + } + + /** + * Blocks that should be converted from a string (keyword) to some other type can use this converter. + */ + private abstract static class TestBlockFromStringConverter implements TestBlockConverter { + protected final DriverContext driverContext; + + TestBlockFromStringConverter(DriverContext driverContext) { + this.driverContext = driverContext; + } + + @Override + public Block convert(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + try (Block.Builder builder = blockBuilder(positionCount)) { + BytesRef scratchPad = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + T value = evalValue(block, i, scratchPad); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + appendValue(builder, value); + valuesAppended = true; + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } finally { + b.close(); + } + } + + abstract Block.Builder blockBuilder(int expectedCount); + + abstract void appendValue(Block.Builder builder, T value); + + abstract T evalValue(BytesRefBlock container, int index, BytesRef scratchPad); + } + + private static class TestLongBlockToStringConverter extends BlockToStringConverter { + TestLongBlockToStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef evalValue(Block container, int index) { + return new BytesRef(Long.toString(((LongBlock) container).getLong(index))); + } + } + + private static class TestLongBlockFromStringConverter extends TestBlockFromStringConverter { + TestLongBlockFromStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + Block.Builder blockBuilder(int expectedCount) { + return driverContext.blockFactory().newLongBlockBuilder(expectedCount); + } + + @Override + Long evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + return StringUtils.parseLong(container.getBytesRef(index, scratchPad).utf8ToString()); + } + + @Override + void appendValue(Block.Builder builder, Long value) { + ((LongBlock.Builder) builder).appendLong(value); + } + } + + private static class TestIntegerBlockToStringConverter extends BlockToStringConverter { + TestIntegerBlockToStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef evalValue(Block container, int index) { + return new BytesRef(Integer.toString(((IntBlock) container).getInt(index))); + } + } + + private static class TestIntegerBlockFromStringConverter extends TestBlockFromStringConverter { + TestIntegerBlockFromStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + Block.Builder blockBuilder(int expectedCount) { + return driverContext.blockFactory().newIntBlockBuilder(expectedCount); + } + + @Override + Integer evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + return (int) StringUtils.parseLong(container.getBytesRef(index, scratchPad).utf8ToString()); + } + + @Override + void appendValue(Block.Builder builder, Integer value) { + ((IntBlock.Builder) builder).appendInt(value); + } + } + + private static class TestBooleanBlockToStringConverter extends BlockToStringConverter { + + TestBooleanBlockToStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef evalValue(Block container, int index) { + return ((BooleanBlock) container).getBoolean(index) ? new BytesRef("true") : new BytesRef("false"); + } + } + + private static class TestBooleanBlockFromStringConverter extends TestBlockFromStringConverter { + + TestBooleanBlockFromStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + Block.Builder blockBuilder(int expectedCount) { + return driverContext.blockFactory().newBooleanBlockBuilder(expectedCount); + } + + @Override + void appendValue(Block.Builder builder, Boolean value) { + ((BooleanBlock.Builder) builder).appendBoolean(value); + } + + @Override + Boolean evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + return Boolean.parseBoolean(container.getBytesRef(index, scratchPad).utf8ToString()); + } + } + + private static class TestDoubleBlockToStringConverter extends BlockToStringConverter { + + TestDoubleBlockToStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef evalValue(Block container, int index) { + return new BytesRef(Double.toString(((DoubleBlock) container).getDouble(index))); + } + } + + private static class TestDoubleBlockFromStringConverter extends TestBlockFromStringConverter { + + TestDoubleBlockFromStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + Block.Builder blockBuilder(int expectedCount) { + return driverContext.blockFactory().newDoubleBlockBuilder(expectedCount); + } + + @Override + void appendValue(Block.Builder builder, Double value) { + ((DoubleBlock.Builder) builder).appendDouble(value); + } + + @Override + Double evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + return Double.parseDouble(container.getBytesRef(index, scratchPad).utf8ToString()); + } + } + + /** + * Many types are backed by BytesRef block, but encode their contents in different ways. + * For example, the IP type has a 16-byte block that encodes both IPv4 and IPv6 as 16byte-IPv6 binary byte arrays. + * But the KEYWORD type has a BytesRef block that encodes the keyword as a UTF-8 string, + * and it typically has a much shorter length for IP data, for example, "192.168.0.1" is 11 bytes. + * Converting blocks between these types involves converting the BytesRef block to the specific internal type, + * and then back to a BytesRef block with the other encoding. + */ + private abstract static class TestBytesRefToBytesRefConverter extends BlockToStringConverter { + + BytesRef scratchPad = new BytesRef(); + + TestBytesRefToBytesRefConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef evalValue(Block container, int index) { + return convertByteRef(((BytesRefBlock) container).getBytesRef(index, scratchPad)); + } + + abstract BytesRef convertByteRef(BytesRef bytesRef); + } + + private static class TestIPToStringConverter extends TestBytesRefToBytesRefConverter { + + TestIPToStringConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef convertByteRef(BytesRef bytesRef) { + return new BytesRef(DocValueFormat.IP.format(bytesRef)); + } + } + + private static class TestStringToIPConverter extends TestBytesRefToBytesRefConverter { + + TestStringToIPConverter(DriverContext driverContext) { + super(driverContext); + } + + @Override + BytesRef convertByteRef(BytesRef bytesRef) { + return StringUtils.parseIP(bytesRef.utf8ToString()); + } + } + + /** + * Utility class for creating type-specific converters based on their typeNamne values. + * We do not support all possibly combinations, but only those that are needed for the tests. + * In particular, either the 'from' or 'to' types must be KEYWORD. + */ + private static class TestDataTypeConverters { + public static TestBlockConverter blockConverter(DriverContext driverContext, String fromTypeName, String toTypeName) { + if (toTypeName == null || fromTypeName.equals(toTypeName)) { + return b -> b; + } + if (isString(fromTypeName)) { + return switch (toTypeName) { + case "boolean" -> new TestBooleanBlockFromStringConverter(driverContext); + case "short", "integer" -> new TestIntegerBlockFromStringConverter(driverContext); + case "long" -> new TestLongBlockFromStringConverter(driverContext); + case "double", "float" -> new TestDoubleBlockFromStringConverter(driverContext); + case "ip" -> new TestStringToIPConverter(driverContext); + default -> throw new UnsupportedOperationException("Conversion from string to " + toTypeName + " is not supported"); + }; + } + if (isString(toTypeName)) { + return switch (fromTypeName) { + case "boolean" -> new TestBooleanBlockToStringConverter(driverContext); + case "short", "integer" -> new TestIntegerBlockToStringConverter(driverContext); + case "long" -> new TestLongBlockToStringConverter(driverContext); + case "double", "float" -> new TestDoubleBlockToStringConverter(driverContext); + case "ip" -> new TestIPToStringConverter(driverContext); + default -> throw new UnsupportedOperationException("Conversion from " + fromTypeName + " to string is not supported"); + }; + } + throw new UnsupportedOperationException("Conversion from " + fromTypeName + " to " + toTypeName + " is not supported"); + } + + private static boolean isString(String typeName) { + return typeName.equals("keyword") || typeName.equals("text"); + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java index af3af033efd4c..875058ba6e0e4 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java @@ -41,7 +41,6 @@ import static org.hamcrest.Matchers.instanceOf; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; -import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public final class CsvAssert { @@ -110,6 +109,9 @@ private static void assertMetadata( if (actualType == Type.INTEGER && expectedType == Type.LONG) { actualType = Type.LONG; } + if (actualType == null) { + actualType = Type.NULL; + } assertEquals( "Different column type for column [" + expectedName + "] (" + expectedType + " != " + actualType + ")", @@ -188,7 +190,13 @@ public static void assertData( for (int row = 0; row < expectedValues.size(); row++) { try { - assertTrue("Expected more data but no more entries found after [" + row + "]", row < actualValues.size()); + if (row >= actualValues.size()) { + if (dataFailures.isEmpty()) { + fail("Expected more data but no more entries found after [" + row + "]"); + } else { + dataFailure(dataFailures, "Expected more data but no more entries found after [" + row + "]\n"); + } + } if (logger != null) { logger.info(row(actualValues, row)); @@ -257,7 +265,11 @@ public static void assertData( } private static void dataFailure(List dataFailures) { - fail("Data mismatch:\n" + dataFailures.stream().map(f -> { + dataFailure(dataFailures, ""); + } + + private static void dataFailure(List dataFailures, String prefixError) { + fail(prefixError + "Data mismatch:\n" + dataFailures.stream().map(f -> { Description description = new StringDescription(); ListMatcher expected; if (f.expected instanceof List e) { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 1c1ec3194fef5..ec5770e8ce70b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -57,6 +57,16 @@ public class CsvTestsDataLoader { private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv"); private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv"); private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv"); + private static final TestsDataset SAMPLE_DATA_STR = new TestsDataset( + "sample_data_str", + "mapping-sample_data_str.json", + "sample_data_str.csv" + ); + private static final TestsDataset SAMPLE_DATA_TS_LONG = new TestsDataset( + "sample_data_ts_long", + "mapping-sample_data_ts_long.json", + "sample_data_ts_long.csv" + ); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv"); private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv"); @@ -95,6 +105,8 @@ public class CsvTestsDataLoader { Map.entry(LANGUAGES.indexName, LANGUAGES), Map.entry(UL_LOGS.indexName, UL_LOGS), Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA), + Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), + Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json new file mode 100644 index 0000000000000..9e97de8c92928 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json @@ -0,0 +1,16 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "client_ip": { + "type": "keyword" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json new file mode 100644 index 0000000000000..ecf21a2a919d0 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json @@ -0,0 +1,16 @@ +{ + "properties": { + "@timestamp": { + "type": "long" + }, + "client_ip": { + "type": "ip" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv new file mode 100644 index 0000000000000..bc98671adc7ff --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv @@ -0,0 +1,8 @@ +@timestamp:date,client_ip:keyword,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,172.21.3.15,1756467,Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z,172.21.3.15,5033755,Connection error +2023-10-23T13:52:55.015Z,172.21.3.15,8268153,Connection error +2023-10-23T13:51:54.732Z,172.21.3.15,725448,Connection error +2023-10-23T13:33:34.937Z,172.21.0.5,1232382,Disconnected +2023-10-23T12:27:28.948Z,172.21.2.113,2764889,Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z,172.21.2.162,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv new file mode 100644 index 0000000000000..2a6add2ea624d --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv @@ -0,0 +1,8 @@ +@timestamp:long,client_ip:ip,event_duration:long,message:keyword +1698069301543,172.21.3.15,1756467,Connected to 10.1.0.1 +1698069235832,172.21.3.15,5033755,Connection error +1698069175015,172.21.3.15,8268153,Connection error +1698069114732,172.21.3.15,725448,Connection error +1698068014937,172.21.0.5,1232382,Disconnected +1698064048948,172.21.2.113,2764889,Connected to 10.1.0.2 +1698063303360,172.21.2.162,3450233,Connected to 10.1.0.3 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec new file mode 100644 index 0000000000000..ee8c4be385e0f --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -0,0 +1,719 @@ +singleIndexIp +FROM sample_data +| EVAL client_ip = TO_IP(client_ip) +| KEEP @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +singleIndexWhereIpLike +FROM sample_data +| WHERE TO_STRING(client_ip) LIKE "172.21.2.*" +| KEEP @timestamp, event_duration, message +| SORT @timestamp DESC +; + +@timestamp:date | event_duration:long | message:keyword +2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 +; + +singleIndexTsLong +FROM sample_data_ts_long +| EVAL @timestamp = TO_DATETIME(@timestamp) +| KEEP @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +singleIndexIpStats +FROM sample_data +| EVAL client_ip = TO_IP(client_ip) +| STATS count=count(*) BY client_ip +| SORT count DESC, client_ip ASC +| KEEP count, client_ip +; + +count:long | client_ip:ip +4 | 172.21.3.15 +1 | 172.21.0.5 +1 | 172.21.2.113 +1 | 172.21.2.162 +; + +singleIndexIpStringStats +FROM sample_data_str +| EVAL client_ip = TO_IP(client_ip) +| STATS count=count(*) BY client_ip +| SORT count DESC, client_ip ASC +| KEEP count, client_ip +; + +count:long | client_ip:ip +4 | 172.21.3.15 +1 | 172.21.0.5 +1 | 172.21.2.113 +1 | 172.21.2.162 +; + +multiIndexIpString +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_str METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexIpStringRename +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_str METADATA _index +| EVAL host_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, host_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | host_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexIpStringRenameToString +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_str METADATA _index +| EVAL host_ip = TO_STRING(TO_IP(client_ip)) +| KEEP _index, @timestamp, host_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | host_ip:keyword | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereIpString +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_str METADATA _index +| WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2") +| KEEP _index, @timestamp, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | event_duration:long | message:keyword +sample_data | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereIpStringLike +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_str METADATA _index +| WHERE TO_STRING(client_ip) LIKE "172.21.2.*" +| KEEP _index, @timestamp, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | event_duration:long | message:keyword +sample_data | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 +; + +multiIndexIpStringStats +required_capability: union_types + +FROM sample_data, sample_data_str +| EVAL client_ip = TO_IP(client_ip) +| STATS count=count(*) BY client_ip +| SORT count DESC, client_ip ASC +| KEEP count, client_ip +; + +count:long | client_ip:ip +8 | 172.21.3.15 +2 | 172.21.0.5 +2 | 172.21.2.113 +2 | 172.21.2.162 +; + +multiIndexIpStringRenameStats +required_capability: union_types + +FROM sample_data, sample_data_str +| EVAL host_ip = TO_IP(client_ip) +| STATS count=count(*) BY host_ip +| SORT count DESC, host_ip ASC +| KEEP count, host_ip +; + +count:long | host_ip:ip +8 | 172.21.3.15 +2 | 172.21.0.5 +2 | 172.21.2.113 +2 | 172.21.2.162 +; + +multiIndexIpStringRenameToStringStats +required_capability: union_types + +FROM sample_data, sample_data_str +| EVAL host_ip = TO_STRING(TO_IP(client_ip)) +| STATS count=count(*) BY host_ip +| SORT count DESC, host_ip ASC +| KEEP count, host_ip +; + +count:long | host_ip:keyword +8 | 172.21.3.15 +2 | 172.21.0.5 +2 | 172.21.2.113 +2 | 172.21.2.162 +; + +multiIndexIpStringStatsInline +required_capability: union_types +required_capability: union_types_inline_fix + +FROM sample_data, sample_data_str +| STATS count=count(*) BY client_ip = TO_IP(client_ip) +| SORT count DESC, client_ip ASC +| KEEP count, client_ip +; + +count:long | client_ip:ip +8 | 172.21.3.15 +2 | 172.21.0.5 +2 | 172.21.2.113 +2 | 172.21.2.162 +; + +multiIndexWhereIpStringStats +required_capability: union_types + +FROM sample_data, sample_data_str +| WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2") +| STATS count=count(*) BY message +| SORT count DESC, message ASC +| KEEP count, message +; + +count:long | message:keyword +2 | Connected to 10.1.0.2 +2 | Connected to 10.1.0.3 +; + +multiIndexTsLong +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_ts_long METADATA _index +| EVAL @timestamp = TO_DATETIME(@timestamp) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexTsLongRename +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_ts_long METADATA _index +| EVAL ts = TO_DATETIME(@timestamp) +| KEEP _index, ts, client_ip, event_duration, message +| SORT _index ASC, ts DESC +; + +_index:keyword | ts:date | client_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexTsLongRenameToString +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_ts_long METADATA _index +| EVAL ts = TO_STRING(TO_DATETIME(@timestamp)) +| KEEP _index, ts, client_ip, event_duration, message +| SORT _index ASC, ts DESC +; + +_index:keyword | ts:keyword | client_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereTsLong +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data, sample_data_ts_long METADATA _index +| WHERE TO_LONG(@timestamp) < 1698068014937 +| KEEP _index, client_ip, event_duration, message +| SORT _index ASC, client_ip ASC +; + +_index:keyword | client_ip:ip | event_duration:long | message:keyword +sample_data | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexTsLongStats +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| EVAL @timestamp = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp)) +| STATS count=count(*) BY @timestamp +| SORT count DESC, @timestamp ASC +| KEEP count, @timestamp +; + +count:long | @timestamp:date +10 | 2023-10-23T13:00:00.000Z +4 | 2023-10-23T12:00:00.000Z +; + +multiIndexTsLongRenameStats +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| EVAL hour = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp)) +| STATS count=count(*) BY hour +| SORT count DESC, hour ASC +| KEEP count, hour +; + +count:long | hour:date +10 | 2023-10-23T13:00:00.000Z +4 | 2023-10-23T12:00:00.000Z +; + +multiIndexTsLongRenameToDatetimeToStringStats +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| EVAL hour = LEFT(TO_STRING(TO_DATETIME(@timestamp)), 13) +| STATS count=count(*) BY hour +| SORT count DESC, hour ASC +| KEEP count, hour +; + +count:long | hour:keyword +10 | 2023-10-23T13 +4 | 2023-10-23T12 +; + +multiIndexTsLongRenameToStringStats +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| EVAL mess = LEFT(TO_STRING(@timestamp), 7) +| STATS count=count(*) BY mess +| SORT count DESC, mess DESC +| KEEP count, mess +; + +count:long | mess:keyword +7 | 2023-10 +4 | 1698069 +1 | 1698068 +1 | 1698064 +1 | 1698063 +; + +multiIndexTsLongStatsInline +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| STATS count=COUNT(*), max=MAX(TO_DATETIME(@timestamp)) +| KEEP count, max +; + +count:long | max:date +14 | 2023-10-23T13:55:01.543Z +; + +multiIndexTsLongStatsInlineDropped +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| STATS count=COUNT(*), max=MAX(TO_DATETIME(@timestamp)) +| KEEP count +; + +count:long +14 +; + +multiIndexWhereTsLongStats +required_capability: union_types + +FROM sample_data, sample_data_ts_long +| WHERE TO_LONG(@timestamp) < 1698068014937 +| STATS count=count(*) BY message +| SORT count DESC, message ASC +| KEEP count, message +; + +count:long | message:keyword +2 | Connected to 10.1.0.2 +2 | Connected to 10.1.0.3 +; + +multiIndexIpStringTsLong +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| EVAL @timestamp = TO_DATETIME(@timestamp), client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT _index ASC, @timestamp DESC +; + +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexIpStringTsLongDropped +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| EVAL @timestamp = TO_DATETIME(@timestamp), client_ip = TO_IP(client_ip) +| KEEP _index, event_duration, message +| SORT _index ASC, event_duration ASC +; + +_index:keyword | event_duration:long | message:keyword +sample_data | 725448 | Connection error +sample_data | 1232382 | Disconnected +sample_data | 1756467 | Connected to 10.1.0.1 +sample_data | 2764889 | Connected to 10.1.0.2 +sample_data | 3450233 | Connected to 10.1.0.3 +sample_data | 5033755 | Connection error +sample_data | 8268153 | Connection error +sample_data_str | 725448 | Connection error +sample_data_str | 1232382 | Disconnected +sample_data_str | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2764889 | Connected to 10.1.0.2 +sample_data_str | 3450233 | Connected to 10.1.0.3 +sample_data_str | 5033755 | Connection error +sample_data_str | 8268153 | Connection error +sample_data_ts_long | 725448 | Connection error +sample_data_ts_long | 1232382 | Disconnected +sample_data_ts_long | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 5033755 | Connection error +sample_data_ts_long | 8268153 | Connection error +; + +multiIndexIpStringTsLongRename +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| EVAL ts = TO_DATETIME(@timestamp), host_ip = TO_IP(client_ip) +| KEEP _index, ts, host_ip, event_duration, message +| SORT _index ASC, ts DESC +; + +_index:keyword | ts:date | host_ip:ip | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexIpStringTsLongRenameDropped +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| EVAL ts = TO_DATETIME(@timestamp), host_ip = TO_IP(client_ip) +| KEEP _index, event_duration, message +| SORT _index ASC, event_duration ASC +; + +_index:keyword | event_duration:long | message:keyword +sample_data | 725448 | Connection error +sample_data | 1232382 | Disconnected +sample_data | 1756467 | Connected to 10.1.0.1 +sample_data | 2764889 | Connected to 10.1.0.2 +sample_data | 3450233 | Connected to 10.1.0.3 +sample_data | 5033755 | Connection error +sample_data | 8268153 | Connection error +sample_data_str | 725448 | Connection error +sample_data_str | 1232382 | Disconnected +sample_data_str | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2764889 | Connected to 10.1.0.2 +sample_data_str | 3450233 | Connected to 10.1.0.3 +sample_data_str | 5033755 | Connection error +sample_data_str | 8268153 | Connection error +sample_data_ts_long | 725448 | Connection error +sample_data_ts_long | 1232382 | Disconnected +sample_data_ts_long | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 5033755 | Connection error +sample_data_ts_long | 8268153 | Connection error +; + +multiIndexIpStringTsLongRenameToString +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| EVAL ts = TO_STRING(TO_DATETIME(@timestamp)), host_ip = TO_STRING(TO_IP(client_ip)) +| KEEP _index, ts, host_ip, event_duration, message +| SORT _index ASC, ts DESC +; + +_index:keyword | ts:keyword | host_ip:keyword | event_duration:long | message:keyword +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereIpStringTsLong +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162" +| KEEP _index, event_duration, message +| SORT _index ASC, message ASC +; + +_index:keyword | event_duration:long | message:keyword +sample_data | 3450233 | Connected to 10.1.0.3 +sample_data_str | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereIpStringTsLongStats +required_capability: union_types + +FROM sample_data* +| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162" +| STATS count=count(*) BY message +| SORT count DESC, message ASC +| KEEP count, message +; + +count:long | message:keyword +3 | Connected to 10.1.0.3 +; + +multiIndexWhereIpStringLikeTsLong +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?" +| KEEP _index, event_duration, message +| SORT _index ASC, message ASC +; + +_index:keyword | event_duration:long | message:keyword +sample_data | 3450233 | Connected to 10.1.0.3 +sample_data_str | 3450233 | Connected to 10.1.0.3 +sample_data_ts_long | 3450233 | Connected to 10.1.0.3 +; + +multiIndexWhereIpStringLikeTsLongStats +required_capability: union_types + +FROM sample_data* +| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?" +| STATS count=count(*) BY message +| SORT count DESC, message ASC +| KEEP count, message +; + +count:long | message:keyword +3 | Connected to 10.1.0.3 +; + +multiIndexMultiColumnTypesRename +required_capability: union_types +required_capability: metadata_fields + +FROM sample_data* METADATA _index +| WHERE event_duration > 8000000 +| EVAL ts = TO_DATETIME(@timestamp), ts_str = TO_STRING(@timestamp), ts_l = TO_LONG(@timestamp), ip = TO_IP(client_ip), ip_str = TO_STRING(client_ip) +| SORT _index ASC, ts DESC +; + +@timestamp:null | client_ip:null | event_duration:long | message:keyword | _index:keyword | ts:date | ts_str:keyword | ts_l:long | ip:ip | ip_str:k +null | null | 8268153 | Connection error | sample_data | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 +null | null | 8268153 | Connection error | sample_data_str | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15 +null | null | 8268153 | Connection error | sample_data_ts_long | 2023-10-23T13:52:55.015Z | 1698069175015 | 1698069175015 | 172.21.3.15 | 172.21.3.15 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index e65f574422dd5..654c1ffd8a5e9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -77,6 +77,11 @@ public class EsqlCapabilities { */ public static final String STRING_LITERAL_AUTO_CASTING_TO_DATETIME_ADD_SUB = "string_literal_auto_casting_to_datetime_add_sub"; + /** + * Support multiple field mappings if appropriate conversion function is used (union types) + */ + public static final String UNION_TYPES = "union_types"; + /** * Support for named or positional parameters in EsqlQueryRequest. */ @@ -94,6 +99,7 @@ private static Set capabilities() { caps.add(METADATA_IGNORED_FIELD); caps.add(FN_MV_APPEND); caps.add(REPEAT); + caps.add(UNION_TYPES); caps.add(NAMED_POSITIONAL_PARAMETER); if (Build.current().isSnapshot()) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 70fbe17a7d470..77a51c8415545 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -27,6 +27,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; @@ -59,6 +60,7 @@ import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; @@ -80,11 +82,13 @@ import org.elasticsearch.xpack.esql.stats.FeatureMetric; import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; import org.elasticsearch.xpack.esql.type.EsqlDataTypes; +import org.elasticsearch.xpack.esql.type.MultiTypeEsField; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -132,8 +136,13 @@ public class Analyzer extends ParameterizedRuleExecutor("Resolution", new ResolveRefs(), new ImplicitCasting()); - var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit()); + var resolution = new Batch<>( + "Resolution", + new ResolveRefs(), + new ResolveUnionTypes(), // Must be after ResolveRefs, so union types can be found + new ImplicitCasting() + ); + var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnresolveUnionTypes()); rules = List.of(init, resolution, finish); } @@ -851,14 +860,6 @@ private static List potentialCandidatesIfNoMatchesFound( } private static Attribute handleSpecialFields(UnresolvedAttribute u, Attribute named) { - if (named instanceof FieldAttribute fa) { - // incompatible mappings - var field = fa.field(); - if (field instanceof InvalidMappedField imf) { - named = u.withUnresolvedMessage("Cannot use field [" + fa.name() + "] due to ambiguities being " + imf.errorMessage()); - } - } - return named.withLocation(u.source()); } @@ -1061,4 +1062,155 @@ public static Expression castStringLiteral(Expression from, DataType target) { } } } + + /** + * The EsqlIndexResolver will create InvalidMappedField instances for fields that are ambiguous (i.e. have multiple mappings). + * During ResolveRefs we do not convert these to UnresolvedAttribute instances, as we want to first determine if they can + * instead be handled by conversion functions within the query. This rule looks for matching conversion functions and converts + * those fields into MultiTypeEsField, which encapsulates the knowledge of how to convert these into a single type. + * This knowledge will be used later in generating the FieldExtractExec with built-in type conversion. + * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule + * (See UnresolveUnionTypes below). + */ + private static class ResolveUnionTypes extends BaseAnalyzerRule { + + record TypeResolutionKey(String fieldName, DataType fieldType) {} + + @Override + protected LogicalPlan doRule(LogicalPlan plan) { + List unionFieldAttributes = new ArrayList<>(); + // See if the eval function has an unresolved MultiTypeEsField field + // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge) + plan = plan.transformExpressionsOnly( + AbstractConvertFunction.class, + convert -> resolveConvertFunction(convert, unionFieldAttributes) + ); + // If no union fields were generated, return the plan as is + if (unionFieldAttributes.isEmpty()) { + return plan; + } + + // Otherwise drop the converted attributes after the alias function, as they are only needed for this function, and + // the original version of the attribute should still be seen as unconverted. + plan = dropConvertedAttributes(plan, unionFieldAttributes); + + // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment + // and thereby get used in FieldExtractExec + plan = plan.transformDown(EsRelation.class, esr -> { + List output = esr.output(); + List missing = new ArrayList<>(); + for (FieldAttribute fa : unionFieldAttributes) { + if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) { + missing.add(fa); + } + } + if (missing.isEmpty() == false) { + output.addAll(missing); + return new EsRelation(esr.source(), esr.index(), output, esr.indexMode(), esr.frozen()); + } + return esr; + }); + return plan; + } + + private LogicalPlan dropConvertedAttributes(LogicalPlan plan, List unionFieldAttributes) { + List projections = new ArrayList<>(plan.output()); + for (var e : unionFieldAttributes) { + projections.removeIf(p -> p.id().equals(e.id())); + } + if (projections.size() != plan.output().size()) { + return new EsqlProject(plan.source(), plan, projections); + } + return plan; + } + + private Expression resolveConvertFunction(AbstractConvertFunction convert, List unionFieldAttributes) { + if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { + HashMap typeResolutions = new HashMap<>(); + Set supportedTypes = convert.supportedTypes(); + imf.getTypesToIndices().keySet().forEach(typeName -> { + DataType type = DataType.fromTypeName(typeName); + if (supportedTypes.contains(type)) { + TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); + var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf); + typeResolutions.put(key, concreteConvert); + } + }); + // If all mapped types were resolved, create a new FieldAttribute with the resolved MultiTypeEsField + if (typeResolutions.size() == imf.getTypesToIndices().size()) { + var resolvedField = resolvedMultiTypeEsField(fa, typeResolutions); + return createIfDoesNotAlreadyExist(fa, resolvedField, unionFieldAttributes); + } + } else if (convert.field() instanceof AbstractConvertFunction subConvert) { + return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes))); + } + return convert; + } + + private Expression createIfDoesNotAlreadyExist( + FieldAttribute fa, + MultiTypeEsField resolvedField, + List unionFieldAttributes + ) { + var unionFieldAttribute = new FieldAttribute(fa.source(), fa.name(), resolvedField); // Generates new ID for the field + int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute); + if (existingIndex >= 0) { + // Do not generate multiple name/type combinations with different IDs + return unionFieldAttributes.get(existingIndex); + } else { + unionFieldAttributes.add(unionFieldAttribute); + return unionFieldAttribute; + } + } + + private MultiTypeEsField resolvedMultiTypeEsField(FieldAttribute fa, HashMap typeResolutions) { + Map typesToConversionExpressions = new HashMap<>(); + InvalidMappedField imf = (InvalidMappedField) fa.field(); + imf.getTypesToIndices().forEach((typeName, indexNames) -> { + DataType type = DataType.fromTypeName(typeName); + TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); + if (typeResolutions.containsKey(key)) { + typesToConversionExpressions.put(typeName, typeResolutions.get(key)); + } + }); + return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions); + } + + private Expression typeSpecificConvert(AbstractConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) { + EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable()); + NameId id = ((FieldAttribute) convert.field()).id(); + FieldAttribute resolvedAttr = new FieldAttribute(source, null, field.getName(), field, null, Nullability.TRUE, id, false); + return convert.replaceChildren(Collections.singletonList(resolvedAttr)); + } + } + + /** + * If there was no AbstractConvertFunction that resolved multi-type fields in the ResolveUnionTypes rules, + * then there could still be some FieldAttributes that contain unresolved MultiTypeEsFields. + * These need to be converted back to actual UnresolvedAttribute in order for validation to generate appropriate failures. + */ + private static class UnresolveUnionTypes extends AnalyzerRules.AnalyzerRule { + @Override + protected boolean skipResolved() { + return false; + } + + @Override + protected LogicalPlan rule(LogicalPlan plan) { + if (plan instanceof EsRelation esRelation) { + // Leave esRelation as InvalidMappedField so that UNSUPPORTED fields can still pass through + return esRelation; + } + return plan.transformExpressionsOnly(FieldAttribute.class, UnresolveUnionTypes::checkUnresolved); + } + + private static Attribute checkUnresolved(FieldAttribute fa) { + var field = fa.field(); + if (field instanceof InvalidMappedField imf) { + String unresolvedMessage = "Cannot use field [" + fa.name() + "] due to ambiguities being " + imf.errorMessage(); + return new UnresolvedAttribute(fa.source(), fa.name(), fa.qualifier(), fa.id(), unresolvedMessage, null); + } + return fa; + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java index 2496d8b82fa6f..96601905d40c9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java @@ -77,7 +77,11 @@ protected final TypeResolution resolveType() { if (childrenResolved() == false) { return new TypeResolution("Unresolved children"); } - return isType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(factories().keySet())); + return isType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes())); + } + + public Set supportedTypes() { + return factories().keySet(); } public static String supportedTypesNames(Set types) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java index b7e4fc9ae622f..08916c14e91bf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java @@ -92,6 +92,8 @@ public List output() { @Override public boolean expressionsResolved() { + // For unresolved expressions to exist in EsRelation is fine, as long as they are not used in later operations + // This allows for them to be converted to null@unsupported fields in final output, an important feature of ES|QL return true; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 04ed433200c2f..fdba785f668d7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -7,20 +7,28 @@ package org.elasticsearch.xpack.esql.planner; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.breaker.NoopCircuitBreaker; import org.elasticsearch.common.logging.HeaderWarning; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.compute.aggregation.GroupingAggregator; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.lucene.LuceneCountOperator; import org.elasticsearch.compute.lucene.LuceneOperator; import org.elasticsearch.compute.lucene.LuceneSourceOperator; import org.elasticsearch.compute.lucene.LuceneTopNSourceOperator; import org.elasticsearch.compute.lucene.TimeSeriesSortedSourceOperatorFactory; import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.Operator; import org.elasticsearch.compute.operator.OrdinalsGroupingOperator; import org.elasticsearch.compute.operator.SourceOperator; @@ -35,13 +43,16 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.search.NestedHelper; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.search.internal.AliasFilter; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec.FieldSort; @@ -50,6 +61,7 @@ import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.LocalExecutionPlannerContext; import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.PhysicalOperation; import org.elasticsearch.xpack.esql.type.EsqlDataTypes; +import org.elasticsearch.xpack.esql.type.MultiTypeEsField; import java.io.IOException; import java.util.ArrayList; @@ -102,17 +114,42 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi var docValuesAttrs = fieldExtractExec.docValuesAttributes(); for (Attribute attr : fieldExtractExec.attributesToExtract()) { layout.append(attr); + var unionTypes = findUnionTypes(attr); DataType dataType = attr.dataType(); MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr)); ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference); String fieldName = attr.name(); boolean isUnsupported = EsqlDataTypes.isUnsupported(dataType); - IntFunction loader = s -> shardContexts.get(s).blockLoader(fieldName, isUnsupported, fieldExtractPreference); + IntFunction loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes); fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader)); } return source.with(new ValuesSourceReaderOperator.Factory(fields, readers, docChannel), layout.build()); } + private BlockLoader getBlockLoaderFor( + int shardId, + String fieldName, + boolean isUnsupported, + MappedFieldType.FieldExtractPreference fieldExtractPreference, + MultiTypeEsField unionTypes + ) { + DefaultShardContext shardContext = (DefaultShardContext) shardContexts.get(shardId); + BlockLoader blockLoader = shardContext.blockLoader(fieldName, isUnsupported, fieldExtractPreference); + if (unionTypes != null) { + String indexName = shardContext.ctx.index().getName(); + Expression conversion = unionTypes.getConversionExpressionForIndex(indexName); + return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion); + } + return blockLoader; + } + + private MultiTypeEsField findUnionTypes(Attribute attr) { + if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField multiTypeEsField) { + return multiTypeEsField; + } + return null; + } + public Function querySupplier(QueryBuilder builder) { QueryBuilder qb = builder == null ? QueryBuilders.matchAllQuery() : builder; return ctx -> shardContexts.get(ctx.index()).toQuery(qb); @@ -321,4 +358,96 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() { return loader; } } + + static class TypeConvertingBlockLoader implements BlockLoader { + protected final BlockLoader delegate; + private final EvalOperator.ExpressionEvaluator convertEvaluator; + + protected TypeConvertingBlockLoader(BlockLoader delegate, AbstractConvertFunction convertFunction) { + this.delegate = delegate; + DriverContext driverContext1 = new DriverContext( + BigArrays.NON_RECYCLING_INSTANCE, + new org.elasticsearch.compute.data.BlockFactory( + new NoopCircuitBreaker(CircuitBreaker.REQUEST), + BigArrays.NON_RECYCLING_INSTANCE + ) + ); + this.convertEvaluator = convertFunction.toEvaluator(e -> driverContext -> new EvalOperator.ExpressionEvaluator() { + @Override + public org.elasticsearch.compute.data.Block eval(Page page) { + // This is a pass-through evaluator, since it sits directly on the source loading (no prior expressions) + return page.getBlock(0); + } + + @Override + public void close() {} + }).get(driverContext1); + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + // Return the delegates builder, which can build the original mapped type, before conversion + return delegate.builder(factory, expectedCount); + } + + @Override + public Block convert(Block block) { + Page page = new Page((org.elasticsearch.compute.data.Block) block); + return convertEvaluator.eval(page); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + ColumnAtATimeReader reader = delegate.columnAtATimeReader(context); + if (reader == null) { + return null; + } + return new ColumnAtATimeReader() { + @Override + public Block read(BlockFactory factory, Docs docs) throws IOException { + Block block = reader.read(factory, docs); + Page page = new Page((org.elasticsearch.compute.data.Block) block); + org.elasticsearch.compute.data.Block converted = convertEvaluator.eval(page); + return converted; + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return reader.toString(); + } + }; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + // We do no type conversion here, since that will be done in the ValueSourceReaderOperator for row-stride cases + // Using the BlockLoader.convert(Block) function defined above + return delegate.rowStrideReader(context); + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return delegate.rowStrideStoredFieldSpec(); + } + + @Override + public boolean supportsOrdinals() { + return delegate.supportsOrdinals(); + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return delegate.ordinals(context); + } + + @Override + public final String toString() { + return "TypeConvertingBlockLoader[delegate=" + delegate + ", convertEvaluator=" + convertEvaluator + "]"; + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index d3b2d5c6e7646..fc00f5be22624 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -64,6 +64,7 @@ import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; +import org.elasticsearch.xpack.esql.type.MultiTypeEsField; import java.lang.invoke.MethodHandles; import java.util.ArrayList; @@ -188,6 +189,7 @@ public List getNamedWriteables() { entries.add(UnsupportedAttribute.ENTRY); // TODO combine with above once these are in the same project entries.addAll(NamedExpression.getNamedWriteables()); entries.add(UnsupportedAttribute.NAMED_EXPRESSION_ENTRY); // TODO combine with above once these are in the same project + entries.add(MultiTypeEsField.ENTRY); // TODO combine with EsField.getNamedWriteables() once these are in the same module return entries; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index 983a45f36169e..5fd7f0c230463 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -225,26 +225,10 @@ private EsField conflictingTypes(String name, String fullName, FieldCapabilities if (type == UNSUPPORTED) { return unsupported(name, fc); } - typesToIndices.computeIfAbsent(type.esType(), _key -> new TreeSet<>()).add(ir.getIndexName()); + typesToIndices.computeIfAbsent(type.typeName(), _key -> new TreeSet<>()).add(ir.getIndexName()); } } - StringBuilder errorMessage = new StringBuilder(); - errorMessage.append("mapped as ["); - errorMessage.append(typesToIndices.size()); - errorMessage.append("] incompatible types: "); - boolean first = true; - for (Map.Entry> e : typesToIndices.entrySet()) { - if (first) { - first = false; - } else { - errorMessage.append(", "); - } - errorMessage.append("["); - errorMessage.append(e.getKey()); - errorMessage.append("] in "); - errorMessage.append(e.getValue()); - } - return new InvalidMappedField(name, errorMessage.toString()); + return new InvalidMappedField(name, typesToIndices); } private EsField conflictingMetricTypes(String name, String fullName, FieldCapabilitiesResponse fieldCapsResponse) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java new file mode 100644 index 0000000000000..2b963e7428e2b --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java @@ -0,0 +1,116 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.type; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * During IndexResolution it could occur that the same field is mapped to different types in different indices. + * The class MultiTypeEfField.UnresolvedField holds that information and allows for later resolution of the field + * to a single type during LogicalPlanOptimization. + * If the plan contains conversion expressions for the different types, the resolution will be done using the conversion expressions, + * in which case a MultiTypeEsField will be created to encapsulate the type resolution capabilities. + * This class can be communicated to the data nodes and used during physical planning to influence field extraction so that + * type conversion is done at the data node level. + */ +public class MultiTypeEsField extends EsField { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + EsField.class, + "MultiTypeEsField", + MultiTypeEsField::new + ); + + private final Map indexToConversionExpressions; + + public MultiTypeEsField(String name, DataType dataType, boolean aggregatable, Map indexToConversionExpressions) { + super(name, dataType, Map.of(), aggregatable); + this.indexToConversionExpressions = indexToConversionExpressions; + } + + public MultiTypeEsField(StreamInput in) throws IOException { + // TODO: Change the conversion expression serialization to i.readNamedWriteable(Expression.class) once Expression is fully supported + this(in.readString(), DataType.readFrom(in), in.readBoolean(), in.readImmutableMap(i -> ((PlanStreamInput) i).readExpression())); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(getName()); + out.writeString(getDataType().typeName()); + out.writeBoolean(isAggregatable()); + out.writeMap(getIndexToConversionExpressions(), (o, v) -> out.writeNamedWriteable(v)); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public Map getIndexToConversionExpressions() { + return indexToConversionExpressions; + } + + public Expression getConversionExpressionForIndex(String indexName) { + return indexToConversionExpressions.get(indexName); + } + + public static MultiTypeEsField resolveFrom( + InvalidMappedField invalidMappedField, + Map typesToConversionExpressions + ) { + Map> typesToIndices = invalidMappedField.getTypesToIndices(); + DataType resolvedDataType = DataType.UNSUPPORTED; + Map indexToConversionExpressions = new HashMap<>(); + for (String typeName : typesToIndices.keySet()) { + Set indices = typesToIndices.get(typeName); + Expression convertExpr = typesToConversionExpressions.get(typeName); + if (resolvedDataType == DataType.UNSUPPORTED) { + resolvedDataType = convertExpr.dataType(); + } else if (resolvedDataType != convertExpr.dataType()) { + throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType()); + } + for (String indexName : indices) { + indexToConversionExpressions.put(indexName, convertExpr); + } + } + return new MultiTypeEsField(invalidMappedField.getName(), resolvedDataType, false, indexToConversionExpressions); + } + + @Override + public boolean equals(Object obj) { + if (super.equals(obj) == false) { + return false; + } + if (obj instanceof MultiTypeEsField other) { + return super.equals(other) && indexToConversionExpressions.equals(other.indexToConversionExpressions); + } + return false; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), indexToConversionExpressions); + } + + @Override + public String toString() { + return super.toString() + " (" + indexToConversionExpressions + ")"; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 44466cebb7dac..27aa985efd6d0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -222,6 +222,14 @@ public CsvTests(String fileName, String groupName, String testName, Integer line public final void test() throws Throwable { try { assumeTrue("Test " + testName + " is not enabled", isEnabled(testName, Version.CURRENT)); + /* + * The csv tests support all but a few features. The unsupported features + * are tested in integration tests. + */ + assumeFalse("metadata fields aren't supported", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METADATA_FIELDS))); + assumeFalse("enrich can't load fields in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.ENRICH_LOAD))); + assumeFalse("can't load metrics in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METRICS_SYNTAX))); + assumeFalse("multiple indices aren't supported", testCase.requiredCapabilities.contains(EsqlCapabilities.UNION_TYPES)); if (Build.current().isSnapshot()) { assertThat( @@ -231,14 +239,6 @@ public final void test() throws Throwable { ); } - /* - * The csv tests support all but a few features. The unsupported features - * are tested in integration tests. - */ - assumeFalse("metadata fields aren't supported", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METADATA_FIELDS))); - assumeFalse("enrich can't load fields in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.ENRICH_LOAD))); - assumeFalse("can't load metrics in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METRICS_SYNTAX))); - doTest(); } catch (Throwable th) { throw reworkException(th); @@ -334,7 +334,7 @@ private PhysicalPlan physicalPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDa private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed) { var preAnalysis = new PreAnalyzer().preAnalyze(parsed); var indices = preAnalysis.indices; - if (indices.size() == 0) { + if (indices.isEmpty()) { /* * If the data set doesn't matter we'll just grab one we know works. * Employees is fine. @@ -345,11 +345,23 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed) } String indexName = indices.get(0).id().index(); - var dataset = CSV_DATASET_MAP.get(indexName); - if (dataset == null) { + List datasets = new ArrayList<>(); + if (indexName.endsWith("*")) { + String indexPrefix = indexName.substring(0, indexName.length() - 1); + for (var entry : CSV_DATASET_MAP.entrySet()) { + if (entry.getKey().startsWith(indexPrefix)) { + datasets.add(entry.getValue()); + } + } + } else { + var dataset = CSV_DATASET_MAP.get(indexName); + datasets.add(dataset); + } + if (datasets.isEmpty()) { throw new IllegalArgumentException("unknown CSV dataset for table [" + indexName + "]"); } - return dataset; + // TODO: Support multiple datasets + return datasets.get(0); } private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java new file mode 100644 index 0000000000000..86baee58ca53f --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java @@ -0,0 +1,188 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.type; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.test.AbstractNamedWriteableTestCase; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToVersion; +import org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import org.elasticsearch.xpack.esql.session.EsqlConfiguration; +import org.elasticsearch.xpack.esql.session.EsqlConfigurationSerializationTests; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isString; + +/** + * This test was originally based on the tests for sub-classes of EsField, like InvalidMappedFieldTests. + * However, it has a few important differences: + *
    + *
  • It is not in the esql.core module, but in the esql module, in order to have access to the sub-classes of AbstractConvertFunction, + * like ToString, which are important conversion Expressions used in the union-types feature.
  • + *
  • It extends AbstractNamedWriteableTestCase instead of AbstractEsFieldTypeTests, + * in order to wrap the StreamInput with a PlanStreamInput, since Expression is not yet fully supported in the new + * serialization approach (NamedWritable).
  • + *
+ * These differences can be minimized once Expression is fully supported in the new serialization approach, and the esql and esql.core + * modules are merged, or at least the relevant classes are moved. + */ +public class MultiTypeEsFieldTests extends AbstractNamedWriteableTestCase { + + private EsqlConfiguration config; + + @Before + public void initConfig() { + config = EsqlConfigurationSerializationTests.randomConfiguration(); + } + + @Override + protected MultiTypeEsField createTestInstance() { + String name = randomAlphaOfLength(4); + boolean toString = randomBoolean(); + DataType dataType = randomFrom(types()); + DataType toType = toString ? DataType.KEYWORD : dataType; + Map indexToConvertExpressions = randomConvertExpressions(name, toString, dataType); + return new MultiTypeEsField(name, toType, false, indexToConvertExpressions); + } + + @Override + protected MultiTypeEsField mutateInstance(MultiTypeEsField instance) throws IOException { + String name = instance.getName(); + DataType dataType = instance.getDataType(); + Map indexToConvertExpressions = instance.getIndexToConversionExpressions(); + switch (between(0, 2)) { + case 0 -> name = randomAlphaOfLength(name.length() + 1); + case 1 -> dataType = randomValueOtherThan(dataType, () -> randomFrom(DataType.types())); + case 2 -> indexToConvertExpressions = mutateConvertExpressions(name, dataType, indexToConvertExpressions); + default -> throw new IllegalArgumentException(); + } + return new MultiTypeEsField(name, dataType, false, indexToConvertExpressions); + } + + @Override + protected final NamedWriteableRegistry getNamedWriteableRegistry() { + List entries = new ArrayList<>(UnaryScalarFunction.getNamedWriteables()); + entries.addAll(Attribute.getNamedWriteables()); + entries.addAll(EsField.getNamedWriteables()); + entries.add(new NamedWriteableRegistry.Entry(MultiTypeEsField.class, "MultiTypeEsField", MultiTypeEsField::new)); + return new NamedWriteableRegistry(entries); + } + + @Override + protected final Class categoryClass() { + return MultiTypeEsField.class; + } + + @Override + protected final MultiTypeEsField copyInstance(MultiTypeEsField instance, TransportVersion version) throws IOException { + return copyInstance( + instance, + getNamedWriteableRegistry(), + (out, v) -> new PlanStreamOutput(out, new PlanNameRegistry(), config).writeNamedWriteable(v), + in -> { + PlanStreamInput pin = new PlanStreamInput(in, new PlanNameRegistry(), in.namedWriteableRegistry(), config); + return pin.readNamedWriteable(MultiTypeEsField.class); + }, + version + ); + } + + private static Map randomConvertExpressions(String name, boolean toString, DataType dataType) { + Map indexToConvertExpressions = new HashMap<>(); + if (toString) { + indexToConvertExpressions.put(randomAlphaOfLength(4), new ToString(Source.EMPTY, fieldAttribute(name, dataType))); + indexToConvertExpressions.put(randomAlphaOfLength(4), new ToString(Source.EMPTY, fieldAttribute(name, DataType.KEYWORD))); + } else { + indexToConvertExpressions.put(randomAlphaOfLength(4), testConvertExpression(name, DataType.KEYWORD, dataType)); + indexToConvertExpressions.put(randomAlphaOfLength(4), testConvertExpression(name, dataType, dataType)); + } + return indexToConvertExpressions; + } + + private Map mutateConvertExpressions( + String name, + DataType toType, + Map indexToConvertExpressions + ) { + return randomValueOtherThan( + indexToConvertExpressions, + () -> randomConvertExpressions(name, toType == DataType.KEYWORD, randomFrom(types())) + ); + } + + private static List types() { + return List.of( + DataType.BOOLEAN, + DataType.DATETIME, + DataType.DOUBLE, + DataType.FLOAT, + DataType.INTEGER, + DataType.IP, + DataType.KEYWORD, + DataType.LONG, + DataType.GEO_POINT, + DataType.GEO_SHAPE, + DataType.CARTESIAN_POINT, + DataType.CARTESIAN_SHAPE, + DataType.VERSION + ); + } + + private static Expression testConvertExpression(String name, DataType fromType, DataType toType) { + FieldAttribute fromField = fieldAttribute(name, fromType); + if (isString(toType)) { + return new ToString(Source.EMPTY, fromField); + } else { + return switch (toType) { + case BOOLEAN -> new ToBoolean(Source.EMPTY, fromField); + case DATETIME -> new ToDatetime(Source.EMPTY, fromField); + case DOUBLE, FLOAT -> new ToDouble(Source.EMPTY, fromField); + case INTEGER -> new ToInteger(Source.EMPTY, fromField); + case LONG -> new ToLong(Source.EMPTY, fromField); + case IP -> new ToIP(Source.EMPTY, fromField); + case KEYWORD -> new ToString(Source.EMPTY, fromField); + case GEO_POINT -> new ToGeoPoint(Source.EMPTY, fromField); + case GEO_SHAPE -> new ToGeoShape(Source.EMPTY, fromField); + case CARTESIAN_POINT -> new ToCartesianPoint(Source.EMPTY, fromField); + case CARTESIAN_SHAPE -> new ToCartesianShape(Source.EMPTY, fromField); + case VERSION -> new ToVersion(Source.EMPTY, fromField); + default -> throw new UnsupportedOperationException("Conversion from " + fromType + " to " + toType + " is not supported"); + }; + } + } + + private static FieldAttribute fieldAttribute(String name, DataType dataType) { + return new FieldAttribute(Source.EMPTY, name, new EsField(name, dataType, Map.of(), true)); + } +} diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml new file mode 100644 index 0000000000000..f3403ca8751c0 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml @@ -0,0 +1,573 @@ +setup: + - requires: + capabilities: + - method: POST + path: /_query + parameters: [method, path, parameters, capabilities] + capabilities: [union_types] + reason: "Union types introduced in 8.15.0" + test_runner_features: [capabilities, allowed_warnings_regex] + + - do: + indices.create: + index: events_ip_long + body: + mappings: + properties: + "@timestamp": + type: date + client_ip: + type: ip + event_duration: + type: long + message: + type: keyword + + - do: + bulk: + refresh: true + index: events_ip_long + body: + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": 1756467, "message": "Connected to 10.1.0.1"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": 5033755, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": 8268153, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": 725448, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": 1232382, "message": "Disconnected"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": 2764889, "message": "Connected to 10.1.0.2"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": 3450233, "message": "Connected to 10.1.0.3"}' + - do: + indices.create: + index: events_keyword_long + body: + mappings: + properties: + "@timestamp": + type: date + client_ip: + type: keyword + event_duration: + type: long + message: + type: keyword + + - do: + bulk: + refresh: true + index: events_keyword_long + body: + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": 1756467, "message": "Connected to 10.1.0.1"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": 5033755, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": 8268153, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": 725448, "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": 1232382, "message": "Disconnected"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": 2764889, "message": "Connected to 10.1.0.2"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": 3450233, "message": "Connected to 10.1.0.3"}' + + - do: + indices.create: + index: events_ip_keyword + body: + mappings: + properties: + "@timestamp": + type: date + client_ip: + type: ip + event_duration: + type: keyword + message: + type: keyword + + - do: + bulk: + refresh: true + index: events_ip_keyword + body: + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": "1756467", "message": "Connected to 10.1.0.1"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": "5033755", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": "8268153", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": "725448", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": "1232382", "message": "Disconnected"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": "2764889", "message": "Connected to 10.1.0.2"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": "3450233", "message": "Connected to 10.1.0.3"}' + + - do: + indices.create: + index: events_keyword_keyword + body: + mappings: + properties: + "@timestamp": + type: date + client_ip: + type: keyword + event_duration: + type: keyword + message: + type: keyword + + - do: + bulk: + refresh: true + index: events_keyword_keyword + body: + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": "1756467", "message": "Connected to 10.1.0.1"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": "5033755", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": "8268153", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": "725448", "message": "Connection error"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": "1232382", "message": "Disconnected"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": "2764889", "message": "Connected to 10.1.0.2"}' + - '{"index": {}}' + - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": "3450233", "message": "Connected to 10.1.0.3"}' + +--- +load single index ip_long: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_long METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 7 } + - match: { values.0.0: "events_ip_long" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + +############################################################################################################ +# Test a single index as a control of the expected results + +--- +load single index keyword_keyword: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_keyword_keyword METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "keyword" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 7 } + - match: { values.0.0: "events_keyword_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: "1756467" } + - match: { values.0.4: "Connected to 10.1.0.1" } + +############################################################################################################ +# Test two indices where the event_duration is mapped as a LONG and as a KEYWORD + +--- +load two indices, showing unsupported type and null value for event_duration: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* METADATA _index | SORT _index ASC, @timestamp DESC' + + - length: { values: 14 } + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "client_ip" } + - match: { columns.1.type: "ip" } + - match: { columns.2.name: "event_duration" } + - match: { columns.2.type: "unsupported" } + - match: { columns.3.name: "message" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "_index" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "2023-10-23T13:55:01.543Z" } + - match: { values.0.1: "172.21.3.15" } + - match: { values.0.2: null } + - match: { values.0.3: "Connected to 10.1.0.1" } + - match: { values.0.4: "events_ip_keyword" } + - match: { values.7.0: "2023-10-23T13:55:01.543Z" } + - match: { values.7.1: "172.21.3.15" } + - match: { values.7.2: null } + - match: { values.7.3: "Connected to 10.1.0.1" } + - match: { values.7.4: "events_ip_long" } + +--- +load two indices with no conversion function, but needs TO_LONG conversion: + - do: + catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword\], \[long\] in \[events_ip_long\]/' + esql.query: + body: + query: 'FROM events_ip_* METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load two indices with incorrect conversion function, TO_IP instead of TO_LONG: + - do: + catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword\], \[long\] in \[events_ip_long\]/' + esql.query: + body: + query: 'FROM events_ip_* METADATA _index | EVAL event_duration = TO_IP(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load two indices with single conversion function TO_LONG: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* METADATA _index | EVAL event_duration = TO_LONG(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "events_ip_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_ip_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: "172.21.3.15" } + - match: { values.7.3: 1756467 } + - match: { values.7.4: "Connected to 10.1.0.1" } + +--- +load two indices and drop ambiguous field event_duration: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* | DROP event_duration' + + - length: { values: 14 } + +--- +load two indices, convert and then drop ambiguous field event_duration: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* | EVAL event_duration = TO_LONG(event_duration) | DROP event_duration' + + - length: { values: 14 } + +--- +load two indices, convert, rename and then drop ambiguous field event_duration: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* | EVAL x = TO_LONG(event_duration) | DROP event_duration' + + - length: { values: 14 } + +--- +# This test needs to change to produce unsupported/null for the original field name +load two indices, convert, rename but not drop ambiguous field event_duration: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_ip_* | EVAL x = TO_LONG(event_duration), y = TO_STRING(event_duration), z = TO_LONG(event_duration) | SORT @timestamp DESC' + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "client_ip" } + - match: { columns.1.type: "ip" } + - match: { columns.2.name: "event_duration" } + - match: { columns.2.type: "unsupported" } + - match: { columns.3.name: "message" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "x" } + - match: { columns.4.type: "long" } + - match: { columns.5.name: "y" } + - match: { columns.5.type: "keyword" } + - match: { columns.6.name: "z" } + - match: { columns.6.type: "long" } + - length: { values: 14 } + - match: { values.0.0: "2023-10-23T13:55:01.543Z" } + - match: { values.0.1: "172.21.3.15" } + - match: { values.0.2: null } + - match: { values.0.3: "Connected to 10.1.0.1" } + - match: { values.0.4: 1756467 } + - match: { values.0.5: "1756467" } + - match: { values.0.6: 1756467 } + - match: { values.1.0: "2023-10-23T13:55:01.543Z" } + - match: { values.1.1: "172.21.3.15" } + - match: { values.1.2: null } + - match: { values.1.3: "Connected to 10.1.0.1" } + - match: { values.1.4: 1756467 } + - match: { values.1.5: "1756467" } + - match: { values.1.6: 1756467 } + +############################################################################################################ +# Test two indices where the IP address is mapped as an IP and as a KEYWORD + +--- +load two indices, showing unsupported type and null value for client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long METADATA _index | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "client_ip" } + - match: { columns.1.type: "unsupported" } + - match: { columns.2.name: "event_duration" } + - match: { columns.2.type: "long" } + - match: { columns.3.name: "message" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "_index" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "2023-10-23T13:55:01.543Z" } + - match: { values.0.1: null } + - match: { values.0.2: 1756467 } + - match: { values.0.3: "Connected to 10.1.0.1" } + - match: { values.0.4: "events_ip_long" } + - match: { values.7.0: "2023-10-23T13:55:01.543Z" } + - match: { values.7.1: null } + - match: { values.7.2: 1756467 } + - match: { values.7.3: "Connected to 10.1.0.1" } + - match: { values.7.4: "events_keyword_long" } + +--- +load two indices with no conversion function, but needs TO_IP conversion: + - do: + catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_long\], \[keyword\] in \[events_keyword_long\]/' + esql.query: + body: + query: 'FROM events_*_long METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load two indices with incorrect conversion function, TO_LONG instead of TO_IP: + - do: + catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_long\], \[keyword\] in \[events_keyword_long\]/' + esql.query: + body: + query: 'FROM events_*_long METADATA _index | EVAL client_ip = TO_LONG(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load two indices with single conversion function TO_IP: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long METADATA _index | EVAL client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 14 } + - match: { values.0.0: "events_ip_long" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_keyword_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: "172.21.3.15" } + - match: { values.7.3: 1756467 } + - match: { values.7.4: "Connected to 10.1.0.1" } + +--- +load two indices and drop ambiguous field client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | DROP client_ip' + + - length: { values: 14 } + +--- +load two indices, convert and then drop ambiguous field client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | EVAL client_ip = TO_IP(client_ip) | DROP client_ip' + + - length: { values: 14 } + +--- +load two indices, convert, rename and then drop ambiguous field client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | EVAL x = TO_IP(client_ip) | DROP client_ip' + + - length: { values: 14 } + +--- +# This test needs to change to produce unsupported/null for the original field name +load two indices, convert, rename but not drop ambiguous field client_ip: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_*_long | EVAL x = TO_IP(client_ip), y = TO_STRING(client_ip), z = TO_IP(client_ip) | SORT @timestamp DESC' + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "client_ip" } + - match: { columns.1.type: "unsupported" } + - match: { columns.2.name: "event_duration" } + - match: { columns.2.type: "long" } + - match: { columns.3.name: "message" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "x" } + - match: { columns.4.type: "ip" } + - match: { columns.5.name: "y" } + - match: { columns.5.type: "keyword" } + - match: { columns.6.name: "z" } + - match: { columns.6.type: "ip" } + - length: { values: 14 } + - match: { values.0.0: "2023-10-23T13:55:01.543Z" } + - match: { values.0.1: null } + - match: { values.0.2: 1756467 } + - match: { values.0.3: "Connected to 10.1.0.1" } + - match: { values.0.4: "172.21.3.15" } + - match: { values.0.5: "172.21.3.15" } + - match: { values.0.6: "172.21.3.15" } + - match: { values.1.0: "2023-10-23T13:55:01.543Z" } + - match: { values.1.1: null } + - match: { values.1.2: 1756467 } + - match: { values.1.3: "Connected to 10.1.0.1" } + - match: { values.1.4: "172.21.3.15" } + - match: { values.1.5: "172.21.3.15" } + - match: { values.1.6: "172.21.3.15" } + +############################################################################################################ +# Test four indices with both the client_IP (IP and KEYWORD) and event_duration (LONG and KEYWORD) mappings + +--- +load four indices with single conversion function TO_LONG: + - do: + catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_keyword, events_ip_long\], \[keyword\] in \[events_keyword_keyword, events_keyword_long\]/' + esql.query: + body: + query: 'FROM events_* METADATA _index | EVAL event_duration = TO_LONG(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load four indices with single conversion function TO_IP: + - do: + catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword, events_keyword_keyword\], \[long\] in \[events_ip_long, events_keyword_long\]/' + esql.query: + body: + query: 'FROM events_* METADATA _index | EVAL client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + +--- +load four indices with multiple conversion functions TO_LONG and TO_IP: + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM events_* METADATA _index | EVAL event_duration = TO_LONG(event_duration), client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC' + + - match: { columns.0.name: "_index" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "@timestamp" } + - match: { columns.1.type: "date" } + - match: { columns.2.name: "client_ip" } + - match: { columns.2.type: "ip" } + - match: { columns.3.name: "event_duration" } + - match: { columns.3.type: "long" } + - match: { columns.4.name: "message" } + - match: { columns.4.type: "keyword" } + - length: { values: 28 } + - match: { values.0.0: "events_ip_keyword" } + - match: { values.0.1: "2023-10-23T13:55:01.543Z" } + - match: { values.0.2: "172.21.3.15" } + - match: { values.0.3: 1756467 } + - match: { values.0.4: "Connected to 10.1.0.1" } + - match: { values.7.0: "events_ip_long" } + - match: { values.7.1: "2023-10-23T13:55:01.543Z" } + - match: { values.7.2: "172.21.3.15" } + - match: { values.7.3: 1756467 } + - match: { values.7.4: "Connected to 10.1.0.1" } + - match: { values.14.0: "events_keyword_keyword" } + - match: { values.14.1: "2023-10-23T13:55:01.543Z" } + - match: { values.14.2: "172.21.3.15" } + - match: { values.14.3: 1756467 } + - match: { values.14.4: "Connected to 10.1.0.1" } + - match: { values.21.0: "events_keyword_long" } + - match: { values.21.1: "2023-10-23T13:55:01.543Z" } + - match: { values.21.2: "172.21.3.15" } + - match: { values.21.3: 1756467 } + - match: { values.21.4: "Connected to 10.1.0.1" } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml new file mode 100644 index 0000000000000..99bd1d6508895 --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml @@ -0,0 +1,203 @@ +setup: + - requires: + capabilities: + - method: POST + path: /_query + parameters: [ method, path, parameters, capabilities ] + capabilities: [ union_types ] + reason: "Union types introduced in 8.15.0" + test_runner_features: [ capabilities, allowed_warnings_regex ] + + - do: + indices.create: + index: test1 + body: + mappings: + properties: + obj: + properties: + keyword: + type: keyword + integer: + type: integer + keyword: + type: boolean + integer: + type: version + + - do: + indices.create: + index: test2 + body: + mappings: + properties: + obj: + properties: + keyword: + type: boolean + integer: + type: version + keyword: + type: keyword + integer: + type: integer + + - do: + bulk: + refresh: true + index: test1 + body: + - '{ "index": {"_id": 11} }' + - '{ "obj.keyword": "true", "obj.integer": 100, "keyword": "true", "integer": "50" }' + - '{ "index": {"_id": 12} }' + - '{ "obj.keyword": "US", "obj.integer": 20, "keyword": false, "integer": "1.2.3" }' + + - do: + bulk: + refresh: true + index: test2 + body: + - '{ "index": {"_id": 21} }' + - '{ "obj.keyword": "true", "obj.integer": "50", "keyword": "true", "integer": 100 }' + - '{ "index": {"_id": 22} }' + - '{ "obj.keyword": false, "obj.integer": "1.2.3", "keyword": "US", "integer": 20 }' + +--- +"load single index": + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test1 METADATA _id | KEEP _id, obj.integer, obj.keyword | SORT _id ASC' + + - match: { columns.0.name: "_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "obj.integer" } + - match: { columns.1.type: "integer" } + - match: { columns.2.name: "obj.keyword" } + - match: { columns.2.type: "keyword" } + - length: { values: 2 } + - match: { values.0.0: "11" } + - match: { values.0.1: 100 } + - match: { values.0.2: "true" } + - match: { values.1.0: "12" } + - match: { values.1.1: 20 } + - match: { values.1.2: "US" } + +--- +"load two indices with to_string": + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test* METADATA _id | EVAL s = TO_STRING(obj.keyword) | KEEP _id, s | SORT _id ASC' + + - match: { columns.0.name: "_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "s" } + - match: { columns.1.type: "keyword" } + - length: { values: 4 } + - match: { values.0.0: "11" } + - match: { values.0.1: "true" } + - match: { values.1.0: "12" } + - match: { values.1.1: "US" } + - match: { values.2.0: "21" } + - match: { values.2.1: "true" } + - match: { values.3.0: "22" } + - match: { values.3.1: "false" } + + +--- +"load two indices with to_version": + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test* METADATA _id | EVAL v = TO_VERSION(TO_STRING(obj.integer)) | KEEP _id, v | SORT _id ASC' + + - match: { columns.0.name: "_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "v" } + - match: { columns.1.type: "version" } + - length: { values: 4 } + - match: { values.0.0: "11" } + - match: { values.0.1: "100" } + - match: { values.1.0: "12" } + - match: { values.1.1: "20" } + - match: { values.2.0: "21" } + - match: { values.2.1: "50" } + - match: { values.3.0: "22" } + - match: { values.3.1: "1.2.3" } + +--- +"load two indices with to_version and to_string": + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test* METADATA _id | EVAL v = TO_VERSION(TO_STRING(obj.integer)), s = TO_STRING(obj.keyword) | KEEP _id, v, s | SORT _id ASC' + + - match: { columns.0.name: "_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "v" } + - match: { columns.1.type: "version" } + - match: { columns.2.name: "s" } + - match: { columns.2.type: "keyword" } + - length: { values: 4 } + - match: { values.0.0: "11" } + - match: { values.0.1: "100" } + - match: { values.0.2: "true" } + - match: { values.1.0: "12" } + - match: { values.1.1: "20" } + - match: { values.1.2: "US" } + - match: { values.2.0: "21" } + - match: { values.2.1: "50" } + - match: { values.2.2: "true" } + - match: { values.3.0: "22" } + - match: { values.3.1: "1.2.3" } + - match: { values.3.2: "false" } + +--- +"load two indices with to_version and to_string nested and un-nested": + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test* METADATA _id | EVAL nv = TO_VERSION(TO_STRING(obj.integer)), uv = TO_VERSION(TO_STRING(integer)), ns = TO_STRING(obj.keyword), us = TO_STRING(keyword) | KEEP _id, nv, uv, ns, us | SORT _id ASC' + + - match: { columns.0.name: "_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "nv" } + - match: { columns.1.type: "version" } + - match: { columns.2.name: "uv" } + - match: { columns.2.type: "version" } + - match: { columns.3.name: "ns" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "us" } + - match: { columns.4.type: "keyword" } + - length: { values: 4 } + - match: { values.0.0: "11" } + - match: { values.0.1: "100" } + - match: { values.0.2: "50" } + - match: { values.0.3: "true" } + - match: { values.0.4: "true" } + - match: { values.1.0: "12" } + - match: { values.1.1: "20" } + - match: { values.1.2: "1.2.3" } + - match: { values.1.3: "US" } + - match: { values.1.4: "false" } + - match: { values.2.0: "21" } + - match: { values.2.1: "50" } + - match: { values.2.2: "100" } + - match: { values.2.3: "true" } + - match: { values.2.4: "true" } + - match: { values.3.0: "22" } + - match: { values.3.1: "1.2.3" } + - match: { values.3.2: "20" } + - match: { values.3.3: "false" } + - match: { values.3.4: "US" }