diff --git a/docs/changelog/107545.yaml b/docs/changelog/107545.yaml
new file mode 100644
index 0000000000000..ad457cc5a533f
--- /dev/null
+++ b/docs/changelog/107545.yaml
@@ -0,0 +1,6 @@
+pr: 107545
+summary: "ESQL: Union Types Support"
+area: ES|QL
+type: enhancement
+issues:
+ - 100603
diff --git a/muted-tests.yml b/muted-tests.yml
index ef3c8188498a9..aef4b526e8b52 100644
--- a/muted-tests.yml
+++ b/muted-tests.yml
@@ -103,3 +103,10 @@ tests:
# - class: org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIPTests
# method: testCrankyEvaluateBlockWithoutNulls
# issue: https://github.com/elastic/elasticsearch/...
+#
+# Mute a single test in an ES|QL csv-spec test file:
+# - class: "org.elasticsearch.xpack.esql.CsvTests"
+# method: "test {union_types.MultiIndexIpStringStatsInline}"
+# issue: "https://github.com/elastic/elasticsearch/..."
+# Note that this mutes for the unit-test-like CsvTests only.
+# Muting for the integration tests needs to be done for each IT class individually.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
index a91f005d6d5ab..42feda3e9dd48 100644
--- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
+++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java
@@ -92,6 +92,16 @@ interface StoredFields {
*/
SortedSetDocValues ordinals(LeafReaderContext context) throws IOException;
+ /**
+ * In support of 'Union Types', we sometimes desire that Blocks loaded from source are immediately
+ * converted in some way. Typically, this would be a type conversion, or an encoding conversion.
+ * @param block original block loaded from source
+ * @return converted block (or original if no conversion required)
+ */
+ default Block convert(Block block) {
+ return block;
+ }
+
/**
* Load blocks with only null.
*/
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java
index a6e713007a97f..0f7d92564c8ab 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/FieldAttribute.java
@@ -168,12 +168,14 @@ protected Attribute clone(
@Override
public int hashCode() {
- return Objects.hash(super.hashCode(), path);
+ return Objects.hash(super.hashCode(), path, field);
}
@Override
public boolean equals(Object obj) {
- return super.equals(obj) && Objects.equals(path, ((FieldAttribute) obj).path);
+ return super.equals(obj)
+ && Objects.equals(path, ((FieldAttribute) obj).path)
+ && Objects.equals(field, ((FieldAttribute) obj).field);
}
@Override
diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java
index fd7bfbec4730f..9b088cfb19f6c 100644
--- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java
+++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/InvalidMappedField.java
@@ -15,11 +15,15 @@
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import java.util.TreeMap;
/**
* Representation of field mapped differently across indices.
* Used during mapping discovery only.
+ * Note that the field typesToIndices is not serialized because that information is
+ * not required through the cluster, only surviving as long as the Analyser phase of query planning.
+ * It is used specifically for the 'union types' feature in ES|QL.
*/
public class InvalidMappedField extends EsField {
static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
@@ -29,10 +33,10 @@ public class InvalidMappedField extends EsField {
);
private final String errorMessage;
+ private final Map> typesToIndices;
public InvalidMappedField(String name, String errorMessage, Map properties) {
- super(name, DataType.UNSUPPORTED, properties, false);
- this.errorMessage = errorMessage;
+ this(name, errorMessage, properties, Map.of());
}
public InvalidMappedField(String name, String errorMessage) {
@@ -43,6 +47,19 @@ public InvalidMappedField(String name) {
this(name, StringUtils.EMPTY, new TreeMap<>());
}
+ /**
+ * Constructor supporting union types, used in ES|QL.
+ */
+ public InvalidMappedField(String name, Map> typesToIndices) {
+ this(name, makeErrorMessage(typesToIndices), new TreeMap<>(), typesToIndices);
+ }
+
+ private InvalidMappedField(String name, String errorMessage, Map properties, Map> typesToIndices) {
+ super(name, DataType.UNSUPPORTED, properties, false);
+ this.errorMessage = errorMessage;
+ this.typesToIndices = typesToIndices;
+ }
+
private InvalidMappedField(StreamInput in) throws IOException {
this(in.readString(), in.readString(), in.readImmutableMap(StreamInput::readString, i -> i.readNamedWriteable(EsField.class)));
}
@@ -88,4 +105,28 @@ public EsField getExactField() {
public Exact getExactInfo() {
return new Exact(false, "Field [" + getName() + "] is invalid, cannot access it");
}
+
+ public Map> getTypesToIndices() {
+ return typesToIndices;
+ }
+
+ private static String makeErrorMessage(Map> typesToIndices) {
+ StringBuilder errorMessage = new StringBuilder();
+ errorMessage.append("mapped as [");
+ errorMessage.append(typesToIndices.size());
+ errorMessage.append("] incompatible types: ");
+ boolean first = true;
+ for (Map.Entry> e : typesToIndices.entrySet()) {
+ if (first) {
+ first = false;
+ } else {
+ errorMessage.append(", ");
+ }
+ errorMessage.append("[");
+ errorMessage.append(e.getKey());
+ errorMessage.append("] in ");
+ errorMessage.append(e.getValue());
+ }
+ return errorMessage.toString();
+ }
}
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java
index 4d41ab27312c3..2e46735bd5bd1 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/Page.java
@@ -83,7 +83,9 @@ private Page(boolean copyBlocks, int positionCount, Block[] blocks) {
private Page(Page prev, Block[] toAdd) {
for (Block block : toAdd) {
if (prev.positionCount != block.getPositionCount()) {
- throw new IllegalArgumentException("Block [" + block + "] does not have same position count");
+ throw new IllegalArgumentException(
+ "Block [" + block + "] does not have same position count: " + block.getPositionCount() + " != " + prev.positionCount
+ );
}
}
this.positionCount = prev.positionCount;
diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java
index 06b1375ac057e..ee747d98c26f8 100644
--- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java
+++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java
@@ -165,6 +165,7 @@ public int get(int i) {
}
}
success = true;
+ return page.appendBlocks(blocks);
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
@@ -172,7 +173,6 @@ public int get(int i) {
Releasables.closeExpectNoException(blocks);
}
}
- return page.appendBlocks(blocks);
}
private void positionFieldWork(int shard, int segment, int firstDoc) {
@@ -233,6 +233,7 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa
new RowStrideReaderWork(
field.rowStride(ctx),
(Block.Builder) field.loader.builder(loaderBlockFactory, docs.count()),
+ field.loader,
f
)
);
@@ -262,17 +263,13 @@ private void loadFromSingleLeaf(Block[] blocks, int shard, int segment, BlockLoa
);
for (int p = 0; p < docs.count(); p++) {
int doc = docs.get(p);
- if (storedFields != null) {
- storedFields.advanceTo(doc);
- }
- for (int r = 0; r < rowStrideReaders.size(); r++) {
- RowStrideReaderWork work = rowStrideReaders.get(r);
- work.reader.read(doc, storedFields, work.builder);
+ storedFields.advanceTo(doc);
+ for (RowStrideReaderWork work : rowStrideReaders) {
+ work.read(doc, storedFields);
}
}
- for (int r = 0; r < rowStrideReaders.size(); r++) {
- RowStrideReaderWork work = rowStrideReaders.get(r);
- blocks[work.offset] = work.builder.build();
+ for (RowStrideReaderWork work : rowStrideReaders) {
+ blocks[work.offset] = work.build();
}
} finally {
Releasables.close(rowStrideReaders);
@@ -310,7 +307,9 @@ private class LoadFromMany implements Releasable {
private final IntVector docs;
private final int[] forwards;
private final int[] backwards;
- private final Block.Builder[] builders;
+ private final Block.Builder[][] builders;
+ private final BlockLoader[][] converters;
+ private final Block.Builder[] fieldTypeBuilders;
private final BlockLoader.RowStrideReader[] rowStride;
BlockLoaderStoredFieldsFromLeafLoader storedFields;
@@ -322,21 +321,25 @@ private class LoadFromMany implements Releasable {
docs = docVector.docs();
forwards = docVector.shardSegmentDocMapForwards();
backwards = docVector.shardSegmentDocMapBackwards();
- builders = new Block.Builder[target.length];
+ fieldTypeBuilders = new Block.Builder[target.length];
+ builders = new Block.Builder[target.length][shardContexts.size()];
+ converters = new BlockLoader[target.length][shardContexts.size()];
rowStride = new BlockLoader.RowStrideReader[target.length];
}
void run() throws IOException {
for (int f = 0; f < fields.length; f++) {
/*
- * Important note: each block loader has a method to build an
- * optimized block loader, but we have *many* fields and some
- * of those block loaders may not be compatible with each other.
- * So! We take the least common denominator which is the loader
- * from the element expected element type.
+ * Important note: each field has a desired type, which might not match the mapped type (in the case of union-types).
+ * We create the final block builders using the desired type, one for each field, but then also use inner builders
+ * (one for each field and shard), and converters (again one for each field and shard) to actually perform the field
+ * loading in a way that is correct for the mapped field type, and then convert between that type and the desired type.
*/
- builders[f] = fields[f].info.type.newBlockBuilder(docs.getPositionCount(), blockFactory);
+ fieldTypeBuilders[f] = fields[f].info.type.newBlockBuilder(docs.getPositionCount(), blockFactory);
+ builders[f] = new Block.Builder[shardContexts.size()];
+ converters[f] = new BlockLoader[shardContexts.size()];
}
+ ComputeBlockLoaderFactory loaderBlockFactory = new ComputeBlockLoaderFactory(blockFactory, docs.getPositionCount());
int p = forwards[0];
int shard = shards.getInt(p);
int segment = segments.getInt(p);
@@ -344,7 +347,8 @@ void run() throws IOException {
positionFieldWork(shard, segment, firstDoc);
LeafReaderContext ctx = ctx(shard, segment);
fieldsMoved(ctx, shard);
- read(firstDoc);
+ verifyBuilders(loaderBlockFactory, shard);
+ read(firstDoc, shard);
for (int i = 1; i < forwards.length; i++) {
p = forwards[i];
shard = shards.getInt(p);
@@ -354,11 +358,19 @@ void run() throws IOException {
ctx = ctx(shard, segment);
fieldsMoved(ctx, shard);
}
- read(docs.getInt(p));
+ verifyBuilders(loaderBlockFactory, shard);
+ read(docs.getInt(p), shard);
}
- for (int f = 0; f < builders.length; f++) {
- try (Block orig = builders[f].build()) {
- target[f] = orig.filter(backwards);
+ for (int f = 0; f < target.length; f++) {
+ for (int s = 0; s < shardContexts.size(); s++) {
+ if (builders[f][s] != null) {
+ try (Block orig = (Block) converters[f][s].convert(builders[f][s].build())) {
+ fieldTypeBuilders[f].copyFrom(orig, 0, orig.getPositionCount());
+ }
+ }
+ }
+ try (Block targetBlock = fieldTypeBuilders[f].build()) {
+ target[f] = targetBlock.filter(backwards);
}
}
}
@@ -379,16 +391,29 @@ private void fieldsMoved(LeafReaderContext ctx, int shard) throws IOException {
}
}
- private void read(int doc) throws IOException {
+ private void verifyBuilders(ComputeBlockLoaderFactory loaderBlockFactory, int shard) {
+ for (int f = 0; f < fields.length; f++) {
+ if (builders[f][shard] == null) {
+ // Note that this relies on field.newShard() to set the loader and converter correctly for the current shard
+ builders[f][shard] = (Block.Builder) fields[f].loader.builder(loaderBlockFactory, docs.getPositionCount());
+ converters[f][shard] = fields[f].loader;
+ }
+ }
+ }
+
+ private void read(int doc, int shard) throws IOException {
storedFields.advanceTo(doc);
for (int f = 0; f < builders.length; f++) {
- rowStride[f].read(doc, storedFields, builders[f]);
+ rowStride[f].read(doc, storedFields, builders[f][shard]);
}
}
@Override
public void close() {
- Releasables.closeExpectNoException(builders);
+ Releasables.closeExpectNoException(fieldTypeBuilders);
+ for (int f = 0; f < fields.length; f++) {
+ Releasables.closeExpectNoException(builders[f]);
+ }
}
}
@@ -468,7 +493,17 @@ private void trackReader(String type, BlockLoader.Reader reader) {
}
}
- private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, int offset) implements Releasable {
+ private record RowStrideReaderWork(BlockLoader.RowStrideReader reader, Block.Builder builder, BlockLoader loader, int offset)
+ implements
+ Releasable {
+ void read(int doc, BlockLoaderStoredFieldsFromLeafLoader storedFields) throws IOException {
+ reader.read(doc, storedFields, builder);
+ }
+
+ Block build() {
+ return (Block) loader.convert(builder.build());
+ }
+
@Override
public void close() {
builder.close();
diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java
new file mode 100644
index 0000000000000..66bcf2a57e393
--- /dev/null
+++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java
@@ -0,0 +1,2020 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.compute.lucene;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.action.ActionListener;
+import org.elasticsearch.action.support.PlainActionFuture;
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.breaker.NoopCircuitBreaker;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.collect.Iterators;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.common.util.concurrent.EsExecutors;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.BytesRefVector;
+import org.elasticsearch.compute.data.DoubleBlock;
+import org.elasticsearch.compute.data.DoubleVector;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.IntBlock;
+import org.elasticsearch.compute.data.IntVector;
+import org.elasticsearch.compute.data.LongBlock;
+import org.elasticsearch.compute.data.LongVector;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.data.TestBlockFactory;
+import org.elasticsearch.compute.operator.AnyOperatorTestCase;
+import org.elasticsearch.compute.operator.CannedSourceOperator;
+import org.elasticsearch.compute.operator.Driver;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.DriverRunner;
+import org.elasticsearch.compute.operator.EvalOperator;
+import org.elasticsearch.compute.operator.Operator;
+import org.elasticsearch.compute.operator.PageConsumerOperator;
+import org.elasticsearch.compute.operator.SequenceLongBlockSourceOperator;
+import org.elasticsearch.compute.operator.SourceOperator;
+import org.elasticsearch.compute.operator.TestResultPageSinkOperator;
+import org.elasticsearch.core.IOUtils;
+import org.elasticsearch.core.Releasables;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.index.IndexVersion;
+import org.elasticsearch.index.mapper.BlockLoader;
+import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.MapperServiceTestCase;
+import org.elasticsearch.index.mapper.ParsedDocument;
+import org.elasticsearch.index.mapper.SourceLoader;
+import org.elasticsearch.index.mapper.SourceToParse;
+import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
+import org.elasticsearch.search.lookup.SearchLookup;
+import org.elasticsearch.threadpool.FixedExecutorBuilder;
+import org.elasticsearch.threadpool.TestThreadPool;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.xcontent.XContentBuilder;
+import org.elasticsearch.xcontent.XContentType;
+import org.elasticsearch.xcontent.json.JsonXContent;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.util.StringUtils;
+import org.hamcrest.Matcher;
+import org.junit.After;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.stream.LongStream;
+
+import static org.elasticsearch.test.MapMatcher.assertMap;
+import static org.elasticsearch.test.MapMatcher.matchesMap;
+import static org.elasticsearch.xpack.esql.core.type.DataType.IP;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.hamcrest.Matchers.not;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.oneOf;
+import static org.hamcrest.Matchers.sameInstance;
+
+/**
+ * These tests are partial duplicates of the tests in ValuesSourceReaderOperatorTests, and focus on testing the behaviour
+ * of the ValuesSourceReaderOperator, but with a few key differences:
+ *
+ * - Multiple indexes and index mappings are defined and tested
+ * -
+ * Most primitive types also include a field with prefix 'str_' which is stored and mapped as a string,
+ * but expected to be extracted and converted directly to the primitive type.
+ * For example:
"str_long": "1" should be read directly into a field named "str_long" of type "long" and value 1.
+ * This tests the ability of the BlockLoader.convert(Block) method to convert a string to a primitive type.
+ *
+ * -
+ * Each index has a few additional custom fields that are stored as specific types, but should be converted to strings by the
+ *
BlockLoader.convert(Block) method. These fields are:
+ *
+ * - ip: stored as an IP type, but should be converted to a string
+ * - duration: stored as a long type, but should be converted to a string
+ *
+ * One index stores them as IP and long types, and the other as keyword types, so we test the behaviour of the
+ * 'union types' capabilities of the ValuesSourceReaderOperator class.
+ *
+ *
+ * Since this test does not have access to the type conversion code in the ESQL module, we have mocks for that behaviour
+ * in the inner classes TestTypeConvertingBlockLoader and TestBlockConverter.
+ */
+@SuppressWarnings("resource")
+public class ValueSourceReaderTypeConversionTests extends AnyOperatorTestCase {
+ private static final String[] PREFIX = new String[] { "a", "b", "c" };
+ private static final Map INDICES = new LinkedHashMap<>();
+ static {
+ addIndex(
+ Map.of(
+ "ip",
+ new TestFieldType<>("ip", IP, d -> "192.169.0." + d % 256, Checks::unionIPsAsStrings),
+ "duration",
+ new TestFieldType<>("duration", DataType.LONG, d -> (long) d, Checks::unionDurationsAsStrings)
+ )
+ );
+ addIndex(
+ Map.of(
+ "ip",
+ new TestFieldType<>("ip", DataType.KEYWORD, d -> "192.169.0." + d % 256, Checks::unionIPsAsStrings),
+ "duration",
+ new TestFieldType<>("duration", DataType.KEYWORD, d -> Integer.toString(d), Checks::unionDurationsAsStrings)
+ )
+ );
+ }
+
+ static void addIndex(Map> fieldTypes) {
+ String indexKey = "index" + (INDICES.size() + 1);
+ INDICES.put(indexKey, new TestIndexMappingConfig(indexKey, INDICES.size(), fieldTypes));
+ }
+
+ private record TestIndexMappingConfig(String indexName, int shardIdx, Map> fieldTypes) {}
+
+ private record TestFieldType(String name, DataType dataType, Function valueGenerator, CheckResults checkResults) {}
+
+ private final Map directories = new HashMap<>();
+ private final Map mapperServices = new HashMap<>();
+ private final Map readers = new HashMap<>();
+ private static final Map> keyToTags = new HashMap<>();
+
+ @After
+ public void closeIndex() throws IOException {
+ IOUtils.close(readers.values());
+ IOUtils.close(directories.values());
+ }
+
+ private Directory directory(String indexKey) {
+ return directories.computeIfAbsent(indexKey, k -> newDirectory());
+ }
+
+ private MapperService mapperService(String indexKey) {
+ return mapperServices.get(indexKey);
+ }
+
+ private List initShardContexts() {
+ return INDICES.keySet()
+ .stream()
+ .map(index -> new ValuesSourceReaderOperator.ShardContext(reader(index), () -> SourceLoader.FROM_STORED_SOURCE))
+ .toList();
+ }
+
+ private IndexReader reader(String indexKey) {
+ if (readers.get(indexKey) == null) {
+ try {
+ initIndex(indexKey, 100, 10);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return readers.get(indexKey);
+ }
+
+ @Override
+ protected Operator.OperatorFactory simple() {
+ return factory(initShardContexts(), mapperService("index1").fieldType("long"), ElementType.LONG);
+ }
+
+ public static Operator.OperatorFactory factory(
+ List shardContexts,
+ MappedFieldType ft,
+ ElementType elementType
+ ) {
+ return factory(shardContexts, ft.name(), elementType, ft.blockLoader(null));
+ }
+
+ private static Operator.OperatorFactory factory(
+ List shardContexts,
+ String name,
+ ElementType elementType,
+ BlockLoader loader
+ ) {
+ return new ValuesSourceReaderOperator.Factory(List.of(new ValuesSourceReaderOperator.FieldInfo(name, elementType, shardIdx -> {
+ if (shardIdx < 0 || shardIdx >= INDICES.size()) {
+ fail("unexpected shardIdx [" + shardIdx + "]");
+ }
+ return loader;
+ })), shardContexts, 0);
+ }
+
+ protected SourceOperator simpleInput(DriverContext context, int size) {
+ return simpleInput(context, size, commitEvery(size), randomPageSize());
+ }
+
+ private int commitEvery(int numDocs) {
+ return Math.max(1, (int) Math.ceil((double) numDocs / 10));
+ }
+
+ private SourceOperator simpleInput(DriverContext context, int size, int commitEvery, int pageSize) {
+ List shardContexts = new ArrayList<>();
+ try {
+ for (String indexKey : INDICES.keySet()) {
+ initIndex(indexKey, size, commitEvery);
+ shardContexts.add(new LuceneSourceOperatorTests.MockShardContext(reader(indexKey), INDICES.get(indexKey).shardIdx));
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ var luceneFactory = new LuceneSourceOperator.Factory(
+ shardContexts,
+ ctx -> new MatchAllDocsQuery(),
+ DataPartitioning.SHARD,
+ 1,// randomIntBetween(1, 10),
+ pageSize,
+ LuceneOperator.NO_LIMIT
+ );
+ return luceneFactory.get(context);
+ }
+
+ private void initMapping(String indexKey) throws IOException {
+ TestIndexMappingConfig indexMappingConfig = INDICES.get(indexKey);
+ mapperServices.put(indexKey, new MapperServiceTestCase() {
+ }.createMapperService(MapperServiceTestCase.mapping(b -> {
+ fieldExamples(b, "key", "integer"); // unique key per-index to use for looking up test values to compare to
+ fieldExamples(b, "indexKey", "keyword"); // index name (can be used to choose index-specific test values)
+ fieldExamples(b, "int", "integer");
+ fieldExamples(b, "short", "short");
+ fieldExamples(b, "byte", "byte");
+ fieldExamples(b, "long", "long");
+ fieldExamples(b, "double", "double");
+ fieldExamples(b, "kwd", "keyword");
+ b.startObject("stored_kwd").field("type", "keyword").field("store", true).endObject();
+ b.startObject("mv_stored_kwd").field("type", "keyword").field("store", true).endObject();
+
+ simpleField(b, "missing_text", "text");
+
+ for (Map.Entry> entry : indexMappingConfig.fieldTypes.entrySet()) {
+ String fieldName = entry.getKey();
+ TestFieldType> fieldType = entry.getValue();
+ simpleField(b, fieldName, fieldType.dataType.typeName());
+ }
+ })));
+ }
+
+ private void initIndex(String indexKey, int size, int commitEvery) throws IOException {
+ initMapping(indexKey);
+ readers.put(indexKey, initIndex(indexKey, directory(indexKey), size, commitEvery));
+ }
+
+ private IndexReader initIndex(String indexKey, Directory directory, int size, int commitEvery) throws IOException {
+ keyToTags.computeIfAbsent(indexKey, k -> new HashMap<>()).clear();
+ TestIndexMappingConfig indexMappingConfig = INDICES.get(indexKey);
+ try (
+ IndexWriter writer = new IndexWriter(
+ directory,
+ newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
+ )
+ ) {
+ for (int d = 0; d < size; d++) {
+ XContentBuilder source = JsonXContent.contentBuilder();
+ source.startObject();
+ source.field("key", d); // documents in this index have a unique key, from which most other values can be derived
+ source.field("indexKey", indexKey); // all documents in this index have the same indexKey
+
+ source.field("long", d);
+ source.field("str_long", Long.toString(d));
+ source.startArray("mv_long");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(-1_000L * d + v);
+ }
+ source.endArray();
+ source.field("source_long", (long) d);
+ source.startArray("mv_source_long");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(-1_000L * d + v);
+ }
+ source.endArray();
+
+ source.field("int", d);
+ source.field("str_int", Integer.toString(d));
+ source.startArray("mv_int");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(1_000 * d + v);
+ }
+ source.endArray();
+ source.field("source_int", d);
+ source.startArray("mv_source_int");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(1_000 * d + v);
+ }
+ source.endArray();
+
+ source.field("short", (short) d);
+ source.field("str_short", Short.toString((short) d));
+ source.startArray("mv_short");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value((short) (2_000 * d + v));
+ }
+ source.endArray();
+ source.field("source_short", (short) d);
+ source.startArray("mv_source_short");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value((short) (2_000 * d + v));
+ }
+ source.endArray();
+
+ source.field("byte", (byte) d);
+ source.field("str_byte", Byte.toString((byte) d));
+ source.startArray("mv_byte");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value((byte) (3_000 * d + v));
+ }
+ source.endArray();
+ source.field("source_byte", (byte) d);
+ source.startArray("mv_source_byte");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value((byte) (3_000 * d + v));
+ }
+ source.endArray();
+
+ source.field("double", d / 123_456d);
+ source.field("str_double", Double.toString(d / 123_456d));
+ source.startArray("mv_double");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(d / 123_456d + v);
+ }
+ source.endArray();
+ source.field("source_double", d / 123_456d);
+ source.startArray("mv_source_double");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(d / 123_456d + v);
+ }
+ source.endArray();
+
+ String tag = keyToTags.get(indexKey).computeIfAbsent(d, k -> "tag-" + randomIntBetween(1, 5));
+ source.field("kwd", tag);
+ source.field("str_kwd", tag);
+ source.startArray("mv_kwd");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(PREFIX[v] + d);
+ }
+ source.endArray();
+ source.field("stored_kwd", Integer.toString(d));
+ source.startArray("mv_stored_kwd");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(PREFIX[v] + d);
+ }
+ source.endArray();
+ source.field("source_kwd", Integer.toString(d));
+ source.startArray("mv_source_kwd");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(PREFIX[v] + d);
+ }
+ source.endArray();
+
+ source.field("text", Integer.toString(d));
+ source.startArray("mv_text");
+ for (int v = 0; v <= d % 3; v++) {
+ source.value(PREFIX[v] + d);
+ }
+ source.endArray();
+
+ for (Map.Entry> entry : indexMappingConfig.fieldTypes.entrySet()) {
+ String fieldName = entry.getKey();
+ TestFieldType> fieldType = entry.getValue();
+ source.field(fieldName, fieldType.valueGenerator.apply(d));
+ }
+
+ source.endObject();
+
+ ParsedDocument doc = mapperService(indexKey).documentParser()
+ .parseDocument(
+ new SourceToParse("id" + d, BytesReference.bytes(source), XContentType.JSON),
+ mapperService(indexKey).mappingLookup()
+ );
+ writer.addDocuments(doc.docs());
+
+ if (d % commitEvery == commitEvery - 1) {
+ writer.commit();
+ }
+ }
+ }
+ return DirectoryReader.open(directory);
+ }
+
+ @Override
+ protected Matcher expectedDescriptionOfSimple() {
+ return equalTo("ValuesSourceReaderOperator[fields = [long]]");
+ }
+
+ @Override
+ protected Matcher expectedToStringOfSimple() {
+ return expectedDescriptionOfSimple();
+ }
+
+ public void testLoadAll() {
+ DriverContext driverContext = driverContext();
+ loadSimpleAndAssert(
+ driverContext,
+ CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000))),
+ Block.MvOrdering.SORTED_ASCENDING,
+ Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING
+ );
+ }
+
+ public void testLoadAllInOnePage() {
+ DriverContext driverContext = driverContext();
+ loadSimpleAndAssert(
+ driverContext,
+ List.of(CannedSourceOperator.mergePages(CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000))))),
+ Block.MvOrdering.UNORDERED,
+ Block.MvOrdering.UNORDERED
+ );
+ }
+
+ public void testManySingleDocPages() {
+ String indexKey = "index1";
+ DriverContext driverContext = driverContext();
+ int numDocs = between(10, 100);
+ List input = CannedSourceOperator.collectPages(simpleInput(driverContext, numDocs, between(1, numDocs), 1));
+ Randomness.shuffle(input);
+ List shardContexts = initShardContexts();
+ List operators = new ArrayList<>();
+ Checks checks = new Checks(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING, Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING);
+ FieldCase testCase = new FieldCase(
+ new KeywordFieldMapper.KeywordFieldType("kwd"),
+ ElementType.BYTES_REF,
+ checks::tags,
+ StatusChecks::keywordsFromDocValues
+ );
+ // TODO: Add index2
+ operators.add(
+ new ValuesSourceReaderOperator.Factory(
+ List.of(testCase.info, fieldInfo(mapperService(indexKey).fieldType("key"), ElementType.INT)),
+ shardContexts,
+ 0
+ ).get(driverContext)
+ );
+ List results = drive(operators, input.iterator(), driverContext);
+ assertThat(results, hasSize(input.size()));
+ for (Page page : results) {
+ assertThat(page.getBlockCount(), equalTo(3));
+ IntVector keys = page.getBlock(2).asVector();
+ for (int p = 0; p < page.getPositionCount(); p++) {
+ int key = keys.getInt(p);
+ testCase.checkResults.check(page.getBlock(1), p, key, indexKey);
+ }
+ }
+ }
+
+ public void testEmpty() {
+ DriverContext driverContext = driverContext();
+ loadSimpleAndAssert(
+ driverContext,
+ CannedSourceOperator.collectPages(simpleInput(driverContext, 0)),
+ Block.MvOrdering.UNORDERED,
+ Block.MvOrdering.UNORDERED
+ );
+ }
+
+ public void testLoadAllInOnePageShuffled() {
+ DriverContext driverContext = driverContext();
+ Page source = CannedSourceOperator.mergePages(CannedSourceOperator.collectPages(simpleInput(driverContext, between(100, 5000))));
+ List shuffleList = new ArrayList<>();
+ IntStream.range(0, source.getPositionCount()).forEach(shuffleList::add);
+ Randomness.shuffle(shuffleList);
+ int[] shuffleArray = shuffleList.stream().mapToInt(Integer::intValue).toArray();
+ Block[] shuffledBlocks = new Block[source.getBlockCount()];
+ for (int b = 0; b < shuffledBlocks.length; b++) {
+ shuffledBlocks[b] = source.getBlock(b).filter(shuffleArray);
+ }
+ source = new Page(shuffledBlocks);
+ loadSimpleAndAssert(driverContext, List.of(source), Block.MvOrdering.UNORDERED, Block.MvOrdering.UNORDERED);
+ }
+
+ private static ValuesSourceReaderOperator.FieldInfo fieldInfo(MappedFieldType ft, ElementType elementType) {
+ return new ValuesSourceReaderOperator.FieldInfo(ft.name(), elementType, shardIdx -> getBlockLoaderFor(shardIdx, ft, null));
+ }
+
+ private static ValuesSourceReaderOperator.FieldInfo fieldInfo(MappedFieldType ft, MappedFieldType ftX, ElementType elementType) {
+ return new ValuesSourceReaderOperator.FieldInfo(ft.name(), elementType, shardIdx -> getBlockLoaderFor(shardIdx, ft, ftX));
+ }
+
+ private ValuesSourceReaderOperator.FieldInfo fieldInfo(String fieldName, ElementType elementType, DataType toType) {
+ return new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, shardIdx -> getBlockLoaderFor(shardIdx, fieldName, toType));
+ }
+
+ private static MappedFieldType.BlockLoaderContext blContext() {
+ return new MappedFieldType.BlockLoaderContext() {
+ @Override
+ public String indexName() {
+ return "test_index";
+ }
+
+ @Override
+ public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
+ return MappedFieldType.FieldExtractPreference.NONE;
+ }
+
+ @Override
+ public SearchLookup lookup() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Set sourcePaths(String name) {
+ return Set.of(name);
+ }
+
+ @Override
+ public String parentField(String field) {
+ return null;
+ }
+
+ @Override
+ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
+ return FieldNamesFieldMapper.FieldNamesFieldType.get(true);
+ }
+ };
+ }
+
+ private void loadSimpleAndAssert(
+ DriverContext driverContext,
+ List input,
+ Block.MvOrdering booleanAndNumericalDocValuesMvOrdering,
+ Block.MvOrdering bytesRefDocValuesMvOrdering
+ ) {
+ List cases = infoAndChecksForEachType(booleanAndNumericalDocValuesMvOrdering, bytesRefDocValuesMvOrdering);
+ List shardContexts = initShardContexts();
+ List operators = new ArrayList<>();
+ operators.add(
+ new ValuesSourceReaderOperator.Factory(
+ List.of(
+ fieldInfo(mapperService("index1").fieldType("key"), ElementType.INT),
+ fieldInfo(mapperService("index1").fieldType("indexKey"), ElementType.BYTES_REF)
+ ),
+ shardContexts,
+ 0
+ ).get(driverContext)
+ );
+ List tests = new ArrayList<>();
+ while (cases.isEmpty() == false) {
+ List b = randomNonEmptySubsetOf(cases);
+ cases.removeAll(b);
+ tests.addAll(b);
+ operators.add(
+ new ValuesSourceReaderOperator.Factory(b.stream().map(i -> i.info).toList(), shardContexts, 0).get(driverContext)
+ );
+ }
+ List results = drive(operators, input.iterator(), driverContext);
+ assertThat(results, hasSize(input.size()));
+ for (Page page : results) {
+ assertThat(page.getBlockCount(), equalTo(tests.size() + 3 /* one for doc, one for keys and one for indexKey */));
+ IntVector keys = page.getBlock(1).asVector();
+ BytesRefVector indexKeys = page.getBlock(2).asVector();
+ for (int p = 0; p < page.getPositionCount(); p++) {
+ int key = keys.getInt(p);
+ String indexKey = indexKeys.getBytesRef(p, new BytesRef()).utf8ToString();
+ for (int i = 0; i < tests.size(); i++) {
+ try {
+ tests.get(i).checkResults.check(page.getBlock(3 + i), p, key, indexKey);
+ } catch (AssertionError e) {
+ throw new AssertionError("error checking " + tests.get(i).info.name() + "[" + p + "]: " + e.getMessage(), e);
+ }
+ }
+ }
+ }
+ for (Operator op : operators) {
+ assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size()));
+ }
+ assertDriverContext(driverContext);
+ }
+
+ interface CheckResults {
+ void check(Block block, int position, int key, String indexKey);
+ }
+
+ interface CheckReaders {
+ void check(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readersBuilt);
+ }
+
+ interface CheckReadersWithName {
+ void check(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readersBuilt);
+ }
+
+ record FieldCase(ValuesSourceReaderOperator.FieldInfo info, CheckResults checkResults, CheckReadersWithName checkReaders) {
+ FieldCase(MappedFieldType ft, ElementType elementType, CheckResults checkResults, CheckReadersWithName checkReaders) {
+ this(fieldInfo(ft, elementType), checkResults, checkReaders);
+ }
+
+ FieldCase(
+ MappedFieldType ft,
+ MappedFieldType ftX,
+ ElementType elementType,
+ CheckResults checkResults,
+ CheckReadersWithName checkReaders
+ ) {
+ this(fieldInfo(ft, ftX, elementType), checkResults, checkReaders);
+ }
+
+ FieldCase(MappedFieldType ft, ElementType elementType, CheckResults checkResults, CheckReaders checkReaders) {
+ this(
+ ft,
+ elementType,
+ checkResults,
+ (name, forcedRowByRow, pageCount, segmentCount, readersBuilt) -> checkReaders.check(
+ forcedRowByRow,
+ pageCount,
+ segmentCount,
+ readersBuilt
+ )
+ );
+ }
+ }
+
+ /**
+ * Asserts that {@link ValuesSourceReaderOperator#status} claims that only
+ * the expected readers are built after loading singleton pages.
+ */
+ public void testLoadAllStatus() {
+ testLoadAllStatus(false);
+ }
+
+ /**
+ * Asserts that {@link ValuesSourceReaderOperator#status} claims that only
+ * the expected readers are built after loading non-singleton pages.
+ */
+ public void testLoadAllStatusAllInOnePage() {
+ testLoadAllStatus(true);
+ }
+
+ private void testLoadAllStatus(boolean allInOnePage) {
+ DriverContext driverContext = driverContext();
+ int numDocs = between(100, 5000);
+ List input = CannedSourceOperator.collectPages(simpleInput(driverContext, numDocs, commitEvery(numDocs), numDocs));
+ assertThat(input, hasSize(20));
+ List shardContexts = initShardContexts();
+ int totalSize = 0;
+ for (var shardContext : shardContexts) {
+ assertThat(shardContext.reader().leaves(), hasSize(10));
+ totalSize += shardContext.reader().leaves().size();
+ }
+ // Build one operator for each field, so we get a unique map to assert on
+ List cases = infoAndChecksForEachType(
+ Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING,
+ Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING
+ );
+ List operators = cases.stream()
+ .map(i -> new ValuesSourceReaderOperator.Factory(List.of(i.info), shardContexts, 0).get(driverContext))
+ .toList();
+ if (allInOnePage) {
+ input = List.of(CannedSourceOperator.mergePages(input));
+ }
+ drive(operators, input.iterator(), driverContext);
+ for (int i = 0; i < cases.size(); i++) {
+ ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) operators.get(i).status();
+ assertThat(status.pagesProcessed(), equalTo(input.size()));
+ FieldCase fc = cases.get(i);
+ fc.checkReaders.check(fc.info.name(), allInOnePage, input.size(), totalSize, status.readersBuilt());
+ }
+ }
+
+ private List infoAndChecksForEachType(
+ Block.MvOrdering booleanAndNumericalDocValuesMvOrdering,
+ Block.MvOrdering bytesRefDocValuesMvOrdering
+ ) {
+ MapperService mapperService = mapperService("index1"); // almost fields have identical mapper service
+ Checks checks = new Checks(booleanAndNumericalDocValuesMvOrdering, bytesRefDocValuesMvOrdering);
+ List r = new ArrayList<>();
+ r.add(new FieldCase(mapperService.fieldType(IdFieldMapper.NAME), ElementType.BYTES_REF, checks::ids, StatusChecks::id));
+ r.add(new FieldCase(TsidExtractingIdFieldMapper.INSTANCE.fieldType(), ElementType.BYTES_REF, checks::ids, StatusChecks::id));
+ r.add(new FieldCase(mapperService.fieldType("long"), ElementType.LONG, checks::longs, StatusChecks::longsFromDocValues));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("str_long"),
+ mapperService.fieldType("long"),
+ ElementType.LONG,
+ checks::longs,
+ StatusChecks::strFromDocValues
+ )
+ );
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_long"),
+ ElementType.LONG,
+ checks::mvLongsFromDocValues,
+ StatusChecks::mvLongsFromDocValues
+ )
+ );
+ r.add(new FieldCase(mapperService.fieldType("missing_long"), ElementType.LONG, checks::constantNulls, StatusChecks::constantNulls));
+ r.add(new FieldCase(mapperService.fieldType("source_long"), ElementType.LONG, checks::longs, StatusChecks::longsFromSource));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_source_long"),
+ ElementType.LONG,
+ checks::mvLongsUnordered,
+ StatusChecks::mvLongsFromSource
+ )
+ );
+ r.add(new FieldCase(mapperService.fieldType("int"), ElementType.INT, checks::ints, StatusChecks::intsFromDocValues));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("str_int"),
+ mapperService.fieldType("int"),
+ ElementType.INT,
+ checks::ints,
+ StatusChecks::strFromDocValues
+ )
+ );
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_int"),
+ ElementType.INT,
+ checks::mvIntsFromDocValues,
+ StatusChecks::mvIntsFromDocValues
+ )
+ );
+ r.add(new FieldCase(mapperService.fieldType("missing_int"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls));
+ r.add(new FieldCase(mapperService.fieldType("source_int"), ElementType.INT, checks::ints, StatusChecks::intsFromSource));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_source_int"),
+ ElementType.INT,
+ checks::mvIntsUnordered,
+ StatusChecks::mvIntsFromSource
+ )
+ );
+ r.add(new FieldCase(mapperService.fieldType("short"), ElementType.INT, checks::shorts, StatusChecks::shortsFromDocValues));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("str_short"),
+ mapperService.fieldType("short"),
+ ElementType.INT,
+ checks::shorts,
+ StatusChecks::strFromDocValues
+ )
+ );
+ r.add(new FieldCase(mapperService.fieldType("mv_short"), ElementType.INT, checks::mvShorts, StatusChecks::mvShortsFromDocValues));
+ r.add(new FieldCase(mapperService.fieldType("missing_short"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls));
+ r.add(new FieldCase(mapperService.fieldType("byte"), ElementType.INT, checks::bytes, StatusChecks::bytesFromDocValues));
+ // r.add(new FieldCase(mapperService.fieldType("str_byte"), ElementType.INT, checks::bytes, StatusChecks::bytesFromDocValues));
+ r.add(new FieldCase(mapperService.fieldType("mv_byte"), ElementType.INT, checks::mvBytes, StatusChecks::mvBytesFromDocValues));
+ r.add(new FieldCase(mapperService.fieldType("missing_byte"), ElementType.INT, checks::constantNulls, StatusChecks::constantNulls));
+ r.add(new FieldCase(mapperService.fieldType("double"), ElementType.DOUBLE, checks::doubles, StatusChecks::doublesFromDocValues));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("str_double"),
+ mapperService.fieldType("double"),
+ ElementType.DOUBLE,
+ checks::doubles,
+ StatusChecks::strFromDocValues
+ )
+ );
+ r.add(
+ new FieldCase(mapperService.fieldType("mv_double"), ElementType.DOUBLE, checks::mvDoubles, StatusChecks::mvDoublesFromDocValues)
+ );
+ r.add(
+ new FieldCase(mapperService.fieldType("missing_double"), ElementType.DOUBLE, checks::constantNulls, StatusChecks::constantNulls)
+ );
+ r.add(new FieldCase(mapperService.fieldType("kwd"), ElementType.BYTES_REF, checks::tags, StatusChecks::keywordsFromDocValues));
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_kwd"),
+ ElementType.BYTES_REF,
+ checks::mvStringsFromDocValues,
+ StatusChecks::mvKeywordsFromDocValues
+ )
+ );
+ r.add(
+ new FieldCase(mapperService.fieldType("missing_kwd"), ElementType.BYTES_REF, checks::constantNulls, StatusChecks::constantNulls)
+ );
+ r.add(new FieldCase(storedKeywordField("stored_kwd"), ElementType.BYTES_REF, checks::strings, StatusChecks::keywordsFromStored));
+ r.add(
+ new FieldCase(
+ storedKeywordField("mv_stored_kwd"),
+ ElementType.BYTES_REF,
+ checks::mvStringsUnordered,
+ StatusChecks::mvKeywordsFromStored
+ )
+ );
+ r.add(
+ new FieldCase(mapperService.fieldType("source_kwd"), ElementType.BYTES_REF, checks::strings, StatusChecks::keywordsFromSource)
+ );
+ r.add(
+ new FieldCase(
+ mapperService.fieldType("mv_source_kwd"),
+ ElementType.BYTES_REF,
+ checks::mvStringsUnordered,
+ StatusChecks::mvKeywordsFromSource
+ )
+ );
+ r.add(
+ new FieldCase(
+ new ValuesSourceReaderOperator.FieldInfo(
+ "constant_bytes",
+ ElementType.BYTES_REF,
+ shardIdx -> BlockLoader.constantBytes(new BytesRef("foo"))
+ ),
+ checks::constantBytes,
+ StatusChecks::constantBytes
+ )
+ );
+ r.add(
+ new FieldCase(
+ new ValuesSourceReaderOperator.FieldInfo("null", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS),
+ checks::constantNulls,
+ StatusChecks::constantNulls
+ )
+ );
+
+ // We only care about the field name at this point, so we can use any index mapper here
+ TestIndexMappingConfig indexMappingConfig = INDICES.get("index1");
+ for (TestFieldType> fieldType : indexMappingConfig.fieldTypes.values()) {
+ r.add(
+ new FieldCase(
+ fieldInfo(fieldType.name, ElementType.BYTES_REF, DataType.KEYWORD),
+ fieldType.checkResults,
+ StatusChecks::unionFromDocValues
+ )
+ );
+ }
+ Collections.shuffle(r, random());
+ return r;
+ }
+
+ record Checks(Block.MvOrdering booleanAndNumericalDocValuesMvOrdering, Block.MvOrdering bytesRefDocValuesMvOrdering) {
+ void longs(Block block, int position, int key, String indexKey) {
+ LongVector longs = ((LongBlock) block).asVector();
+ assertThat(longs.getLong(position), equalTo((long) key));
+ }
+
+ void ints(Block block, int position, int key, String indexKey) {
+ IntVector ints = ((IntBlock) block).asVector();
+ assertThat(ints.getInt(position), equalTo(key));
+ }
+
+ void shorts(Block block, int position, int key, String indexKey) {
+ IntVector ints = ((IntBlock) block).asVector();
+ assertThat(ints.getInt(position), equalTo((int) (short) key));
+ }
+
+ void bytes(Block block, int position, int key, String indexKey) {
+ IntVector ints = ((IntBlock) block).asVector();
+ assertThat(ints.getInt(position), equalTo((int) (byte) key));
+ }
+
+ void doubles(Block block, int position, int key, String indexKey) {
+ DoubleVector doubles = ((DoubleBlock) block).asVector();
+ assertThat(doubles.getDouble(position), equalTo(key / 123_456d));
+ }
+
+ void strings(Block block, int position, int key, String indexKey) {
+ BytesRefVector keywords = ((BytesRefBlock) block).asVector();
+ assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo(Integer.toString(key)));
+ }
+
+ static void unionIPsAsStrings(Block block, int position, int key, String indexKey) {
+ BytesRefVector keywords = ((BytesRefBlock) block).asVector();
+ BytesRef bytesRef = keywords.getBytesRef(position, new BytesRef());
+ TestIndexMappingConfig mappingConfig = INDICES.get(indexKey);
+ TestFieldType> fieldType = mappingConfig.fieldTypes.get("ip");
+ String expected = fieldType.valueGenerator.apply(key).toString();
+ // Conversion should already be done in FieldInfo!
+ // BytesRef found = (fieldType.dataType.typeName().equals("ip")) ? new BytesRef(DocValueFormat.IP.format(bytesRef)) : bytesRef;
+ assertThat(bytesRef.utf8ToString(), equalTo(expected));
+ }
+
+ static void unionDurationsAsStrings(Block block, int position, int key, String indexKey) {
+ BytesRefVector keywords = ((BytesRefBlock) block).asVector();
+ BytesRef bytesRef = keywords.getBytesRef(position, new BytesRef());
+ TestIndexMappingConfig mappingConfig = INDICES.get(indexKey);
+ TestFieldType> fieldType = mappingConfig.fieldTypes.get("duration");
+ String expected = fieldType.valueGenerator.apply(key).toString();
+ assertThat(bytesRef.utf8ToString(), equalTo(expected));
+ }
+
+ void tags(Block block, int position, int key, String indexKey) {
+ BytesRefVector keywords = ((BytesRefBlock) block).asVector();
+ Object[] validTags = INDICES.keySet().stream().map(keyToTags::get).map(t -> t.get(key)).toArray();
+ assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), oneOf(validTags));
+ }
+
+ void ids(Block block, int position, int key, String indexKey) {
+ BytesRefVector ids = ((BytesRefBlock) block).asVector();
+ assertThat(ids.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("id" + key));
+ }
+
+ void constantBytes(Block block, int position, int key, String indexKey) {
+ BytesRefVector keywords = ((BytesRefBlock) block).asVector();
+ assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("foo"));
+ }
+
+ void constantNulls(Block block, int position, int key, String indexKey) {
+ assertTrue(block.areAllValuesNull());
+ assertTrue(block.isNull(position));
+ }
+
+ void mvLongsFromDocValues(Block block, int position, int key, String indexKey) {
+ mvLongs(block, position, key, booleanAndNumericalDocValuesMvOrdering);
+ }
+
+ void mvLongsUnordered(Block block, int position, int key, String indexKey) {
+ mvLongs(block, position, key, Block.MvOrdering.UNORDERED);
+ }
+
+ private void mvLongs(Block block, int position, int key, Block.MvOrdering expectedMv) {
+ LongBlock longs = (LongBlock) block;
+ assertThat(longs.getValueCount(position), equalTo(key % 3 + 1));
+ int offset = longs.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(longs.getLong(offset + v), equalTo(-1_000L * key + v));
+ }
+ if (key % 3 > 0) {
+ assertThat(longs.mvOrdering(), equalTo(expectedMv));
+ }
+ }
+
+ void mvIntsFromDocValues(Block block, int position, int key, String indexKey) {
+ mvInts(block, position, key, booleanAndNumericalDocValuesMvOrdering);
+ }
+
+ void mvIntsUnordered(Block block, int position, int key, String indexKey) {
+ mvInts(block, position, key, Block.MvOrdering.UNORDERED);
+ }
+
+ private void mvInts(Block block, int position, int key, Block.MvOrdering expectedMv) {
+ IntBlock ints = (IntBlock) block;
+ assertThat(ints.getValueCount(position), equalTo(key % 3 + 1));
+ int offset = ints.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(ints.getInt(offset + v), equalTo(1_000 * key + v));
+ }
+ if (key % 3 > 0) {
+ assertThat(ints.mvOrdering(), equalTo(expectedMv));
+ }
+ }
+
+ void mvShorts(Block block, int position, int key, String indexKey) {
+ IntBlock ints = (IntBlock) block;
+ assertThat(ints.getValueCount(position), equalTo(key % 3 + 1));
+ int offset = ints.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(ints.getInt(offset + v), equalTo((int) (short) (2_000 * key + v)));
+ }
+ if (key % 3 > 0) {
+ assertThat(ints.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering));
+ }
+ }
+
+ void mvBytes(Block block, int position, int key, String indexKey) {
+ IntBlock ints = (IntBlock) block;
+ assertThat(ints.getValueCount(position), equalTo(key % 3 + 1));
+ int offset = ints.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(ints.getInt(offset + v), equalTo((int) (byte) (3_000 * key + v)));
+ }
+ if (key % 3 > 0) {
+ assertThat(ints.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering));
+ }
+ }
+
+ void mvDoubles(Block block, int position, int key, String indexKey) {
+ DoubleBlock doubles = (DoubleBlock) block;
+ int offset = doubles.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(doubles.getDouble(offset + v), equalTo(key / 123_456d + v));
+ }
+ if (key % 3 > 0) {
+ assertThat(doubles.mvOrdering(), equalTo(booleanAndNumericalDocValuesMvOrdering));
+ }
+ }
+
+ void mvStringsFromDocValues(Block block, int position, int key, String indexKey) {
+ mvStrings(block, position, key, bytesRefDocValuesMvOrdering);
+ }
+
+ void mvStringsUnordered(Block block, int position, int key, String indexKey) {
+ mvStrings(block, position, key, Block.MvOrdering.UNORDERED);
+ }
+
+ void mvStrings(Block block, int position, int key, Block.MvOrdering expectedMv) {
+ BytesRefBlock text = (BytesRefBlock) block;
+ assertThat(text.getValueCount(position), equalTo(key % 3 + 1));
+ int offset = text.getFirstValueIndex(position);
+ for (int v = 0; v <= key % 3; v++) {
+ assertThat(text.getBytesRef(offset + v, new BytesRef()).utf8ToString(), equalTo(PREFIX[v] + key));
+ }
+ if (key % 3 > 0) {
+ assertThat(text.mvOrdering(), equalTo(expectedMv));
+ }
+ }
+ }
+
+ static class StatusChecks {
+
+ static void strFromDocValues(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues(name, "Ordinals", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void longsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("long", "Longs", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void longsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void intsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("int", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void intsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void shortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("short", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void bytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void doublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void keywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ docValues("kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void keywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ stored("stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void keywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvLongsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvLongsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("mv_source_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvIntsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvIntsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("mv_source_int", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvShortsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_short", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvBytesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_byte", "Ints", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvDoublesFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_double", "Doubles", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvKeywordsFromDocValues(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ mvDocValues("mv_kwd", "Ordinals", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvKeywordsFromStored(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ stored("mv_stored_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void mvKeywordsFromSource(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ source("mv_source_kwd", "Bytes", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ static void unionFromDocValues(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ // TODO: develop a working check for this
+ // docValues(name, "Ordinals", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ private static void docValues(
+ String name,
+ String type,
+ boolean forcedRowByRow,
+ int pageCount,
+ int segmentCount,
+ Map, ?> readers
+ ) {
+ if (forcedRowByRow) {
+ assertMap(
+ "Expected segment count in " + readers + "\n",
+ readers,
+ matchesMap().entry(name + ":row_stride:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(segmentCount))
+ );
+ } else {
+ assertMap(
+ "Expected segment count in " + readers + "\n",
+ readers,
+ matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type, lessThanOrEqualTo(pageCount))
+ );
+ }
+ }
+
+ private static void mvDocValues(
+ String name,
+ String type,
+ boolean forcedRowByRow,
+ int pageCount,
+ int segmentCount,
+ Map, ?> readers
+ ) {
+ if (forcedRowByRow) {
+ Integer singletons = (Integer) readers.remove(name + ":row_stride:BlockDocValuesReader.Singleton" + type);
+ if (singletons != null) {
+ segmentCount -= singletons;
+ }
+ assertMap(readers, matchesMap().entry(name + ":row_stride:BlockDocValuesReader." + type, segmentCount));
+ } else {
+ Integer singletons = (Integer) readers.remove(name + ":column_at_a_time:BlockDocValuesReader.Singleton" + type);
+ if (singletons != null) {
+ pageCount -= singletons;
+ }
+ assertMap(
+ readers,
+ matchesMap().entry(name + ":column_at_a_time:BlockDocValuesReader." + type, lessThanOrEqualTo(pageCount))
+ );
+ }
+ }
+
+ static void id(boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ stored("_id", "Id", forcedRowByRow, pageCount, segmentCount, readers);
+ }
+
+ private static void source(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ Matcher count;
+ if (forcedRowByRow) {
+ count = equalTo(segmentCount);
+ } else {
+ count = lessThanOrEqualTo(pageCount);
+ Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null");
+ assertThat(columnAttempts, not(nullValue()));
+ }
+
+ Integer sequentialCount = (Integer) readers.remove("stored_fields[requires_source:true, fields:0, sequential: true]");
+ Integer nonSequentialCount = (Integer) readers.remove("stored_fields[requires_source:true, fields:0, sequential: false]");
+ int totalReaders = (sequentialCount == null ? 0 : sequentialCount) + (nonSequentialCount == null ? 0 : nonSequentialCount);
+ assertThat(totalReaders, count);
+
+ assertMap(readers, matchesMap().entry(name + ":row_stride:BlockSourceReader." + type, count));
+ }
+
+ private static void stored(String name, String type, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ Matcher count;
+ if (forcedRowByRow) {
+ count = equalTo(segmentCount);
+ } else {
+ count = lessThanOrEqualTo(pageCount);
+ Integer columnAttempts = (Integer) readers.remove(name + ":column_at_a_time:null");
+ assertThat(columnAttempts, not(nullValue()));
+ }
+
+ Integer sequentialCount = (Integer) readers.remove("stored_fields[requires_source:false, fields:1, sequential: true]");
+ Integer nonSequentialCount = (Integer) readers.remove("stored_fields[requires_source:false, fields:1, sequential: false]");
+ int totalReaders = (sequentialCount == null ? 0 : sequentialCount) + (nonSequentialCount == null ? 0 : nonSequentialCount);
+ assertThat(totalReaders, count);
+
+ assertMap(readers, matchesMap().entry(name + ":row_stride:BlockStoredFieldsReader." + type, count));
+ }
+
+ static void constantBytes(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ if (forcedRowByRow) {
+ assertMap(readers, matchesMap().entry(name + ":row_stride:constant[[66 6f 6f]]", segmentCount));
+ } else {
+ assertMap(readers, matchesMap().entry(name + ":column_at_a_time:constant[[66 6f 6f]]", lessThanOrEqualTo(pageCount)));
+ }
+ }
+
+ static void constantNulls(String name, boolean forcedRowByRow, int pageCount, int segmentCount, Map, ?> readers) {
+ if (forcedRowByRow) {
+ assertMap(readers, matchesMap().entry(name + ":row_stride:constant_nulls", segmentCount));
+ } else {
+ assertMap(readers, matchesMap().entry(name + ":column_at_a_time:constant_nulls", lessThanOrEqualTo(pageCount)));
+ }
+ }
+ }
+
+ public void testWithNulls() throws IOException {
+ String indexKey = "index1";
+ mapperServices.put(indexKey, new MapperServiceTestCase() {
+ }.createMapperService(MapperServiceTestCase.mapping(b -> {
+ fieldExamples(b, "i", "integer");
+ fieldExamples(b, "j", "long");
+ fieldExamples(b, "d", "double");
+ })));
+ MappedFieldType intFt = mapperService(indexKey).fieldType("i");
+ MappedFieldType longFt = mapperService(indexKey).fieldType("j");
+ MappedFieldType doubleFt = mapperService(indexKey).fieldType("d");
+ MappedFieldType kwFt = new KeywordFieldMapper.KeywordFieldType("kw");
+
+ NumericDocValuesField intField = new NumericDocValuesField(intFt.name(), 0);
+ NumericDocValuesField longField = new NumericDocValuesField(longFt.name(), 0);
+ NumericDocValuesField doubleField = new DoubleDocValuesField(doubleFt.name(), 0);
+ final int numDocs = between(100, 5000);
+ try (RandomIndexWriter w = new RandomIndexWriter(random(), directory(indexKey))) {
+ Document doc = new Document();
+ for (int i = 0; i < numDocs; i++) {
+ doc.clear();
+ intField.setLongValue(i);
+ doc.add(intField);
+ if (i % 100 != 0) { // Do not set field for every 100 values
+ longField.setLongValue(i);
+ doc.add(longField);
+ doubleField.setDoubleValue(i);
+ doc.add(doubleField);
+ doc.add(new SortedDocValuesField(kwFt.name(), new BytesRef("kw=" + i)));
+ }
+ w.addDocument(doc);
+ }
+ w.commit();
+ readers.put(indexKey, w.getReader());
+ }
+ LuceneSourceOperatorTests.MockShardContext shardContext = new LuceneSourceOperatorTests.MockShardContext(reader(indexKey), 0);
+ DriverContext driverContext = driverContext();
+ var luceneFactory = new LuceneSourceOperator.Factory(
+ List.of(shardContext),
+ ctx -> new MatchAllDocsQuery(),
+ randomFrom(DataPartitioning.values()),
+ randomIntBetween(1, 10),
+ randomPageSize(),
+ LuceneOperator.NO_LIMIT
+ );
+ var vsShardContext = new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE);
+ try (
+ Driver driver = new Driver(
+ driverContext,
+ luceneFactory.get(driverContext),
+ List.of(
+ factory(List.of(vsShardContext), intFt, ElementType.INT).get(driverContext),
+ factory(List.of(vsShardContext), longFt, ElementType.LONG).get(driverContext),
+ factory(List.of(vsShardContext), doubleFt, ElementType.DOUBLE).get(driverContext),
+ factory(List.of(vsShardContext), kwFt, ElementType.BYTES_REF).get(driverContext)
+ ),
+ new PageConsumerOperator(page -> {
+ try {
+ logger.debug("New page: {}", page);
+ IntBlock intValuesBlock = page.getBlock(1);
+ LongBlock longValuesBlock = page.getBlock(2);
+ DoubleBlock doubleValuesBlock = page.getBlock(3);
+ BytesRefBlock keywordValuesBlock = page.getBlock(4);
+
+ for (int i = 0; i < page.getPositionCount(); i++) {
+ assertFalse(intValuesBlock.isNull(i));
+ long j = intValuesBlock.getInt(i);
+ // Every 100 documents we set fields to null
+ boolean fieldIsEmpty = j % 100 == 0;
+ assertEquals(fieldIsEmpty, longValuesBlock.isNull(i));
+ assertEquals(fieldIsEmpty, doubleValuesBlock.isNull(i));
+ assertEquals(fieldIsEmpty, keywordValuesBlock.isNull(i));
+ }
+ } finally {
+ page.releaseBlocks();
+ }
+ }),
+ () -> {}
+ )
+ ) {
+ runDriver(driver);
+ }
+ assertDriverContext(driverContext);
+ }
+
+ private XContentBuilder fieldExamples(XContentBuilder builder, String name, String type) throws IOException {
+ simpleField(builder, name, type);
+ simpleField(builder, "str_" + name, "keyword");
+ simpleField(builder, "mv_" + name, type);
+ simpleField(builder, "missing_" + name, type);
+ sourceField(builder, "source_" + name, type);
+ return sourceField(builder, "mv_source_" + name, type);
+ }
+
+ private XContentBuilder simpleField(XContentBuilder builder, String name, String type) throws IOException {
+ return builder.startObject(name).field("type", type).endObject();
+ }
+
+ private XContentBuilder sourceField(XContentBuilder builder, String name, String type) throws IOException {
+ return builder.startObject(name).field("type", type).field("store", false).field("doc_values", false).endObject();
+ }
+
+ private KeywordFieldMapper.KeywordFieldType storedKeywordField(String name) {
+ FieldType ft = new FieldType(KeywordFieldMapper.Defaults.FIELD_TYPE);
+ ft.setDocValuesType(DocValuesType.NONE);
+ ft.setStored(true);
+ ft.freeze();
+ return new KeywordFieldMapper.KeywordFieldType(
+ name,
+ ft,
+ Lucene.KEYWORD_ANALYZER,
+ Lucene.KEYWORD_ANALYZER,
+ Lucene.KEYWORD_ANALYZER,
+ new KeywordFieldMapper.Builder(name, IndexVersion.current()).docValues(false),
+ true // TODO randomize - load from stored keyword fields if stored even in synthetic source
+ );
+ }
+
+ @AwaitsFix(bugUrl = "Get working for multiple indices")
+ public void testNullsShared() {
+ DriverContext driverContext = driverContext();
+ List shardContexts = initShardContexts();
+ int[] pages = new int[] { 0 };
+ try (
+ Driver d = new Driver(
+ driverContext,
+ simpleInput(driverContext, 10),
+ List.of(
+ new ValuesSourceReaderOperator.Factory(
+ List.of(
+ new ValuesSourceReaderOperator.FieldInfo("null1", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS),
+ new ValuesSourceReaderOperator.FieldInfo("null2", ElementType.NULL, shardIdx -> BlockLoader.CONSTANT_NULLS)
+ ),
+ shardContexts,
+ 0
+ ).get(driverContext)
+ ),
+ new PageConsumerOperator(page -> {
+ try {
+ assertThat(page.getBlockCount(), equalTo(3));
+ assertThat(page.getBlock(1).areAllValuesNull(), equalTo(true));
+ assertThat(page.getBlock(2).areAllValuesNull(), equalTo(true));
+ assertThat(page.getBlock(1), sameInstance(page.getBlock(2)));
+ pages[0]++;
+ } finally {
+ page.releaseBlocks();
+ }
+ }),
+ () -> {}
+ )
+ ) {
+ runDriver(d);
+ }
+ assertThat(pages[0], greaterThan(0));
+ assertDriverContext(driverContext);
+ }
+
+ public void testDescriptionOfMany() throws IOException {
+ String indexKey = "index1";
+ initIndex(indexKey, 1, 1);
+ Block.MvOrdering ordering = randomFrom(Block.MvOrdering.values());
+ List cases = infoAndChecksForEachType(ordering, ordering);
+
+ ValuesSourceReaderOperator.Factory factory = new ValuesSourceReaderOperator.Factory(
+ cases.stream().map(c -> c.info).toList(),
+ List.of(new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE)),
+ 0
+ );
+ assertThat(factory.describe(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]"));
+ try (Operator op = factory.get(driverContext())) {
+ assertThat(op.toString(), equalTo("ValuesSourceReaderOperator[fields = [" + cases.size() + " fields]]"));
+ }
+ }
+
+ public void testManyShards() throws IOException {
+ String indexKey = "index1";
+ initMapping(indexKey);
+ int shardCount = between(2, 10);
+ int size = between(100, 1000);
+ Directory[] dirs = new Directory[shardCount];
+ IndexReader[] readers = new IndexReader[shardCount];
+ Closeable[] closeMe = new Closeable[shardCount * 2];
+ Set seenShards = new TreeSet<>();
+ Map keyCounts = new TreeMap<>();
+ try {
+ for (int d = 0; d < dirs.length; d++) {
+ closeMe[d * 2 + 1] = dirs[d] = newDirectory();
+ closeMe[d * 2] = readers[d] = initIndex(indexKey, dirs[d], size, between(10, size * 2));
+ }
+ List contexts = new ArrayList<>();
+ List readerShardContexts = new ArrayList<>();
+ for (int s = 0; s < shardCount; s++) {
+ contexts.add(new LuceneSourceOperatorTests.MockShardContext(readers[s], s));
+ readerShardContexts.add(new ValuesSourceReaderOperator.ShardContext(readers[s], () -> SourceLoader.FROM_STORED_SOURCE));
+ }
+ var luceneFactory = new LuceneSourceOperator.Factory(
+ contexts,
+ ctx -> new MatchAllDocsQuery(),
+ DataPartitioning.SHARD,
+ randomIntBetween(1, 10),
+ 1000,
+ LuceneOperator.NO_LIMIT
+ );
+ // TODO add index2
+ MappedFieldType ft = mapperService(indexKey).fieldType("key");
+ var readerFactory = new ValuesSourceReaderOperator.Factory(
+ List.of(new ValuesSourceReaderOperator.FieldInfo("key", ElementType.INT, shardIdx -> {
+ seenShards.add(shardIdx);
+ return ft.blockLoader(blContext());
+ })),
+ readerShardContexts,
+ 0
+ );
+ DriverContext driverContext = driverContext();
+ List results = drive(
+ readerFactory.get(driverContext),
+ CannedSourceOperator.collectPages(luceneFactory.get(driverContext)).iterator(),
+ driverContext
+ );
+ assertThat(seenShards, equalTo(IntStream.range(0, shardCount).boxed().collect(Collectors.toCollection(TreeSet::new))));
+ for (Page p : results) {
+ IntBlock keyBlock = p.getBlock(1);
+ IntVector keys = keyBlock.asVector();
+ for (int i = 0; i < keys.getPositionCount(); i++) {
+ keyCounts.merge(keys.getInt(i), 1, Integer::sum);
+ }
+ }
+ assertThat(keyCounts.keySet(), hasSize(size));
+ for (int k = 0; k < size; k++) {
+ assertThat(keyCounts.get(k), equalTo(shardCount));
+ }
+ } finally {
+ IOUtils.close(closeMe);
+ }
+ }
+
+ protected final List drive(Operator operator, Iterator input, DriverContext driverContext) {
+ return drive(List.of(operator), input, driverContext);
+ }
+
+ protected final List drive(List operators, Iterator input, DriverContext driverContext) {
+ List results = new ArrayList<>();
+ boolean success = false;
+ try (
+ Driver d = new Driver(
+ driverContext,
+ new CannedSourceOperator(input),
+ operators,
+ new TestResultPageSinkOperator(results::add),
+ () -> {}
+ )
+ ) {
+ runDriver(d);
+ success = true;
+ } finally {
+ if (success == false) {
+ Releasables.closeExpectNoException(Releasables.wrap(() -> Iterators.map(results.iterator(), p -> p::releaseBlocks)));
+ }
+ }
+ return results;
+ }
+
+ public static void runDriver(Driver driver) {
+ runDriver(List.of(driver));
+ }
+
+ public static void runDriver(List drivers) {
+ drivers = new ArrayList<>(drivers);
+ int dummyDrivers = between(0, 10);
+ for (int i = 0; i < dummyDrivers; i++) {
+ drivers.add(
+ new Driver(
+ "dummy-session",
+ 0,
+ 0,
+ new DriverContext(BigArrays.NON_RECYCLING_INSTANCE, TestBlockFactory.getNonBreakingInstance()),
+ () -> "dummy-driver",
+ new SequenceLongBlockSourceOperator(
+ TestBlockFactory.getNonBreakingInstance(),
+ LongStream.range(0, between(1, 100)),
+ between(1, 100)
+ ),
+ List.of(),
+ new PageConsumerOperator(Page::releaseBlocks),
+ Driver.DEFAULT_STATUS_INTERVAL,
+ () -> {}
+ )
+ );
+ }
+ Randomness.shuffle(drivers);
+ int numThreads = between(1, 16);
+ ThreadPool threadPool = new TestThreadPool(
+ getTestClass().getSimpleName(),
+ new FixedExecutorBuilder(Settings.EMPTY, "esql", numThreads, 1024, "esql", EsExecutors.TaskTrackingConfig.DEFAULT)
+ );
+ var driverRunner = new DriverRunner(threadPool.getThreadContext()) {
+ @Override
+ protected void start(Driver driver, ActionListener driverListener) {
+ Driver.start(threadPool.getThreadContext(), threadPool.executor("esql"), driver, between(1, 10000), driverListener);
+ }
+ };
+ PlainActionFuture future = new PlainActionFuture<>();
+ try {
+ driverRunner.runToCompletion(drivers, future);
+ future.actionGet(TimeValue.timeValueSeconds(30));
+ } finally {
+ terminate(threadPool);
+ }
+ }
+
+ public static void assertDriverContext(DriverContext driverContext) {
+ assertTrue(driverContext.isFinished());
+ assertThat(driverContext.getSnapshot().releasables(), empty());
+ }
+
+ public static int randomPageSize() {
+ if (randomBoolean()) {
+ return between(1, 16);
+ } else {
+ return between(1, 16 * 1024);
+ }
+ }
+
+ /**
+ * This method will produce the same converter for all shards, which makes it useful for general type converting tests,
+ * but not specifically union-types tests which require different converters for each shard.
+ */
+ private static BlockLoader getBlockLoaderFor(int shardIdx, MappedFieldType ft, MappedFieldType ftX) {
+ if (shardIdx < 0 || shardIdx >= INDICES.size()) {
+ fail("unexpected shardIdx [" + shardIdx + "]");
+ }
+ BlockLoader blockLoader = ft.blockLoader(blContext());
+ if (ftX != null && ftX.typeName().equals(ft.typeName()) == false) {
+ blockLoader = new TestTypeConvertingBlockLoader(blockLoader, ft.typeName(), ftX.typeName());
+ } else {
+ TestIndexMappingConfig mappingConfig = INDICES.get("index" + (shardIdx + 1));
+ TestFieldType> testFieldType = mappingConfig.fieldTypes.get(ft.name());
+ if (testFieldType != null) {
+ blockLoader = new TestTypeConvertingBlockLoader(blockLoader, testFieldType.dataType.typeName(), "keyword");
+ }
+ }
+ return blockLoader;
+ }
+
+ /**
+ * This method is used to generate shard-specific field information, so we can have different types and BlockLoaders for each shard.
+ */
+ private BlockLoader getBlockLoaderFor(int shardIdx, String fieldName, DataType toType) {
+ if (shardIdx < 0 || shardIdx >= INDICES.size()) {
+ fail("unexpected shardIdx [" + shardIdx + "]");
+ }
+ String indexKey = "index" + (shardIdx + 1);
+ TestIndexMappingConfig mappingConfig = INDICES.get(indexKey);
+ TestFieldType> testFieldType = mappingConfig.fieldTypes.get(fieldName);
+ if (testFieldType == null) {
+ throw new IllegalArgumentException("Unknown test field: " + fieldName);
+ }
+ MapperService mapper = mapperService(indexKey);
+ MappedFieldType ft = mapper.fieldType(fieldName);
+ BlockLoader blockLoader = ft.blockLoader(blContext());
+ blockLoader = new TestTypeConvertingBlockLoader(blockLoader, testFieldType.dataType.typeName(), toType.typeName());
+ return blockLoader;
+ }
+
+ /**
+ * The implementation of union-types relies on the BlockLoader.convert(Block) to convert the block to the correct type
+ * at the point it is read from source, so that the rest of the query only deals with a single type for that field.
+ * This is implemented in the 'esql' module, and so we have a mock for this behaviour here, which is a simplified subset of the
+ * features in the real implementation.
+ */
+ static class TestTypeConvertingBlockLoader implements BlockLoader {
+ protected final BlockLoader delegate;
+ private final EvalOperator.ExpressionEvaluator convertEvaluator;
+
+ protected TestTypeConvertingBlockLoader(BlockLoader delegate, String fromTypeName, String toTypeName) {
+ this.delegate = delegate;
+ DriverContext driverContext = new DriverContext(
+ BigArrays.NON_RECYCLING_INSTANCE,
+ new org.elasticsearch.compute.data.BlockFactory(
+ new NoopCircuitBreaker(CircuitBreaker.REQUEST),
+ BigArrays.NON_RECYCLING_INSTANCE
+ )
+ );
+ TestBlockConverter blockConverter = TestDataTypeConverters.blockConverter(driverContext, fromTypeName, toTypeName);
+ this.convertEvaluator = new EvalOperator.ExpressionEvaluator() {
+ @Override
+ public org.elasticsearch.compute.data.Block eval(Page page) {
+ org.elasticsearch.compute.data.Block block = page.getBlock(0);
+ return blockConverter.convert(block);
+ }
+
+ @Override
+ public void close() {}
+ };
+ }
+
+ @Override
+ public Builder builder(BlockFactory factory, int expectedCount) {
+ // Return the delegates builder, which can build the original mapped type, before conversion
+ return delegate.builder(factory, expectedCount);
+ }
+
+ @Override
+ public Block convert(Block block) {
+ Page page = new Page((org.elasticsearch.compute.data.Block) block);
+ return convertEvaluator.eval(page);
+ }
+
+ @Override
+ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException {
+ ColumnAtATimeReader reader = delegate.columnAtATimeReader(context);
+ if (reader == null) {
+ return null;
+ }
+ return new ColumnAtATimeReader() {
+ @Override
+ public Block read(BlockFactory factory, Docs docs) throws IOException {
+ Block block = reader.read(factory, docs);
+ Page page = new Page((org.elasticsearch.compute.data.Block) block);
+ return convertEvaluator.eval(page);
+ }
+
+ @Override
+ public boolean canReuse(int startingDocID) {
+ return reader.canReuse(startingDocID);
+ }
+
+ @Override
+ public String toString() {
+ return reader.toString();
+ }
+ };
+ }
+
+ @Override
+ public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
+ // We do no type conversion here, since that will be done in the ValueSourceReaderOperator for row-stride cases
+ // Using the BlockLoader.convert(Block) function defined above
+ return delegate.rowStrideReader(context);
+ }
+
+ @Override
+ public StoredFieldsSpec rowStrideStoredFieldSpec() {
+ return delegate.rowStrideStoredFieldSpec();
+ }
+
+ @Override
+ public boolean supportsOrdinals() {
+ return delegate.supportsOrdinals();
+ }
+
+ @Override
+ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
+ return delegate.ordinals(context);
+ }
+
+ @Override
+ public final String toString() {
+ return "TypeConvertingBlockLoader[delegate=" + delegate + "]";
+ }
+ }
+
+ @FunctionalInterface
+ private interface TestBlockConverter {
+ Block convert(Block block);
+ }
+
+ /**
+ * Blocks that should be converted from some type to a string (keyword) can use this converter.
+ */
+ private abstract static class BlockToStringConverter implements TestBlockConverter {
+ private final DriverContext driverContext;
+
+ BlockToStringConverter(DriverContext driverContext) {
+ this.driverContext = driverContext;
+ }
+
+ @Override
+ public Block convert(Block block) {
+ int positionCount = block.getPositionCount();
+ try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
+ for (int p = 0; p < positionCount; p++) {
+ int valueCount = block.getValueCount(p);
+ int start = block.getFirstValueIndex(p);
+ int end = start + valueCount;
+ boolean positionOpened = false;
+ boolean valuesAppended = false;
+ for (int i = start; i < end; i++) {
+ BytesRef value = evalValue(block, i);
+ if (positionOpened == false && valueCount > 1) {
+ builder.beginPositionEntry();
+ positionOpened = true;
+ }
+ builder.appendBytesRef(value);
+ valuesAppended = true;
+ }
+ if (valuesAppended == false) {
+ builder.appendNull();
+ } else if (positionOpened) {
+ builder.endPositionEntry();
+ }
+ }
+ return builder.build();
+ } finally {
+ block.close();
+ }
+ }
+
+ abstract BytesRef evalValue(Block container, int index);
+ }
+
+ /**
+ * Blocks that should be converted from a string (keyword) to some other type can use this converter.
+ */
+ private abstract static class TestBlockFromStringConverter implements TestBlockConverter {
+ protected final DriverContext driverContext;
+
+ TestBlockFromStringConverter(DriverContext driverContext) {
+ this.driverContext = driverContext;
+ }
+
+ @Override
+ public Block convert(Block b) {
+ BytesRefBlock block = (BytesRefBlock) b;
+ int positionCount = block.getPositionCount();
+ try (Block.Builder builder = blockBuilder(positionCount)) {
+ BytesRef scratchPad = new BytesRef();
+ for (int p = 0; p < positionCount; p++) {
+ int valueCount = block.getValueCount(p);
+ int start = block.getFirstValueIndex(p);
+ int end = start + valueCount;
+ boolean positionOpened = false;
+ boolean valuesAppended = false;
+ for (int i = start; i < end; i++) {
+ T value = evalValue(block, i, scratchPad);
+ if (positionOpened == false && valueCount > 1) {
+ builder.beginPositionEntry();
+ positionOpened = true;
+ }
+ appendValue(builder, value);
+ valuesAppended = true;
+ }
+ if (valuesAppended == false) {
+ builder.appendNull();
+ } else if (positionOpened) {
+ builder.endPositionEntry();
+ }
+ }
+ return builder.build();
+ } finally {
+ b.close();
+ }
+ }
+
+ abstract Block.Builder blockBuilder(int expectedCount);
+
+ abstract void appendValue(Block.Builder builder, T value);
+
+ abstract T evalValue(BytesRefBlock container, int index, BytesRef scratchPad);
+ }
+
+ private static class TestLongBlockToStringConverter extends BlockToStringConverter {
+ TestLongBlockToStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef evalValue(Block container, int index) {
+ return new BytesRef(Long.toString(((LongBlock) container).getLong(index)));
+ }
+ }
+
+ private static class TestLongBlockFromStringConverter extends TestBlockFromStringConverter {
+ TestLongBlockFromStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ Block.Builder blockBuilder(int expectedCount) {
+ return driverContext.blockFactory().newLongBlockBuilder(expectedCount);
+ }
+
+ @Override
+ Long evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
+ return StringUtils.parseLong(container.getBytesRef(index, scratchPad).utf8ToString());
+ }
+
+ @Override
+ void appendValue(Block.Builder builder, Long value) {
+ ((LongBlock.Builder) builder).appendLong(value);
+ }
+ }
+
+ private static class TestIntegerBlockToStringConverter extends BlockToStringConverter {
+ TestIntegerBlockToStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef evalValue(Block container, int index) {
+ return new BytesRef(Integer.toString(((IntBlock) container).getInt(index)));
+ }
+ }
+
+ private static class TestIntegerBlockFromStringConverter extends TestBlockFromStringConverter {
+ TestIntegerBlockFromStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ Block.Builder blockBuilder(int expectedCount) {
+ return driverContext.blockFactory().newIntBlockBuilder(expectedCount);
+ }
+
+ @Override
+ Integer evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
+ return (int) StringUtils.parseLong(container.getBytesRef(index, scratchPad).utf8ToString());
+ }
+
+ @Override
+ void appendValue(Block.Builder builder, Integer value) {
+ ((IntBlock.Builder) builder).appendInt(value);
+ }
+ }
+
+ private static class TestBooleanBlockToStringConverter extends BlockToStringConverter {
+
+ TestBooleanBlockToStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef evalValue(Block container, int index) {
+ return ((BooleanBlock) container).getBoolean(index) ? new BytesRef("true") : new BytesRef("false");
+ }
+ }
+
+ private static class TestBooleanBlockFromStringConverter extends TestBlockFromStringConverter {
+
+ TestBooleanBlockFromStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ Block.Builder blockBuilder(int expectedCount) {
+ return driverContext.blockFactory().newBooleanBlockBuilder(expectedCount);
+ }
+
+ @Override
+ void appendValue(Block.Builder builder, Boolean value) {
+ ((BooleanBlock.Builder) builder).appendBoolean(value);
+ }
+
+ @Override
+ Boolean evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
+ return Boolean.parseBoolean(container.getBytesRef(index, scratchPad).utf8ToString());
+ }
+ }
+
+ private static class TestDoubleBlockToStringConverter extends BlockToStringConverter {
+
+ TestDoubleBlockToStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef evalValue(Block container, int index) {
+ return new BytesRef(Double.toString(((DoubleBlock) container).getDouble(index)));
+ }
+ }
+
+ private static class TestDoubleBlockFromStringConverter extends TestBlockFromStringConverter {
+
+ TestDoubleBlockFromStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ Block.Builder blockBuilder(int expectedCount) {
+ return driverContext.blockFactory().newDoubleBlockBuilder(expectedCount);
+ }
+
+ @Override
+ void appendValue(Block.Builder builder, Double value) {
+ ((DoubleBlock.Builder) builder).appendDouble(value);
+ }
+
+ @Override
+ Double evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
+ return Double.parseDouble(container.getBytesRef(index, scratchPad).utf8ToString());
+ }
+ }
+
+ /**
+ * Many types are backed by BytesRef block, but encode their contents in different ways.
+ * For example, the IP type has a 16-byte block that encodes both IPv4 and IPv6 as 16byte-IPv6 binary byte arrays.
+ * But the KEYWORD type has a BytesRef block that encodes the keyword as a UTF-8 string,
+ * and it typically has a much shorter length for IP data, for example, "192.168.0.1" is 11 bytes.
+ * Converting blocks between these types involves converting the BytesRef block to the specific internal type,
+ * and then back to a BytesRef block with the other encoding.
+ */
+ private abstract static class TestBytesRefToBytesRefConverter extends BlockToStringConverter {
+
+ BytesRef scratchPad = new BytesRef();
+
+ TestBytesRefToBytesRefConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef evalValue(Block container, int index) {
+ return convertByteRef(((BytesRefBlock) container).getBytesRef(index, scratchPad));
+ }
+
+ abstract BytesRef convertByteRef(BytesRef bytesRef);
+ }
+
+ private static class TestIPToStringConverter extends TestBytesRefToBytesRefConverter {
+
+ TestIPToStringConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef convertByteRef(BytesRef bytesRef) {
+ return new BytesRef(DocValueFormat.IP.format(bytesRef));
+ }
+ }
+
+ private static class TestStringToIPConverter extends TestBytesRefToBytesRefConverter {
+
+ TestStringToIPConverter(DriverContext driverContext) {
+ super(driverContext);
+ }
+
+ @Override
+ BytesRef convertByteRef(BytesRef bytesRef) {
+ return StringUtils.parseIP(bytesRef.utf8ToString());
+ }
+ }
+
+ /**
+ * Utility class for creating type-specific converters based on their typeNamne values.
+ * We do not support all possibly combinations, but only those that are needed for the tests.
+ * In particular, either the 'from' or 'to' types must be KEYWORD.
+ */
+ private static class TestDataTypeConverters {
+ public static TestBlockConverter blockConverter(DriverContext driverContext, String fromTypeName, String toTypeName) {
+ if (toTypeName == null || fromTypeName.equals(toTypeName)) {
+ return b -> b;
+ }
+ if (isString(fromTypeName)) {
+ return switch (toTypeName) {
+ case "boolean" -> new TestBooleanBlockFromStringConverter(driverContext);
+ case "short", "integer" -> new TestIntegerBlockFromStringConverter(driverContext);
+ case "long" -> new TestLongBlockFromStringConverter(driverContext);
+ case "double", "float" -> new TestDoubleBlockFromStringConverter(driverContext);
+ case "ip" -> new TestStringToIPConverter(driverContext);
+ default -> throw new UnsupportedOperationException("Conversion from string to " + toTypeName + " is not supported");
+ };
+ }
+ if (isString(toTypeName)) {
+ return switch (fromTypeName) {
+ case "boolean" -> new TestBooleanBlockToStringConverter(driverContext);
+ case "short", "integer" -> new TestIntegerBlockToStringConverter(driverContext);
+ case "long" -> new TestLongBlockToStringConverter(driverContext);
+ case "double", "float" -> new TestDoubleBlockToStringConverter(driverContext);
+ case "ip" -> new TestIPToStringConverter(driverContext);
+ default -> throw new UnsupportedOperationException("Conversion from " + fromTypeName + " to string is not supported");
+ };
+ }
+ throw new UnsupportedOperationException("Conversion from " + fromTypeName + " to " + toTypeName + " is not supported");
+ }
+
+ private static boolean isString(String typeName) {
+ return typeName.equals("keyword") || typeName.equals("text");
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java
index af3af033efd4c..875058ba6e0e4 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java
@@ -41,7 +41,6 @@
import static org.hamcrest.Matchers.instanceOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public final class CsvAssert {
@@ -110,6 +109,9 @@ private static void assertMetadata(
if (actualType == Type.INTEGER && expectedType == Type.LONG) {
actualType = Type.LONG;
}
+ if (actualType == null) {
+ actualType = Type.NULL;
+ }
assertEquals(
"Different column type for column [" + expectedName + "] (" + expectedType + " != " + actualType + ")",
@@ -188,7 +190,13 @@ public static void assertData(
for (int row = 0; row < expectedValues.size(); row++) {
try {
- assertTrue("Expected more data but no more entries found after [" + row + "]", row < actualValues.size());
+ if (row >= actualValues.size()) {
+ if (dataFailures.isEmpty()) {
+ fail("Expected more data but no more entries found after [" + row + "]");
+ } else {
+ dataFailure(dataFailures, "Expected more data but no more entries found after [" + row + "]\n");
+ }
+ }
if (logger != null) {
logger.info(row(actualValues, row));
@@ -257,7 +265,11 @@ public static void assertData(
}
private static void dataFailure(List dataFailures) {
- fail("Data mismatch:\n" + dataFailures.stream().map(f -> {
+ dataFailure(dataFailures, "");
+ }
+
+ private static void dataFailure(List dataFailures, String prefixError) {
+ fail(prefixError + "Data mismatch:\n" + dataFailures.stream().map(f -> {
Description description = new StringDescription();
ListMatcher expected;
if (f.expected instanceof List> e) {
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
index 1c1ec3194fef5..ec5770e8ce70b 100644
--- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
@@ -57,6 +57,16 @@ public class CsvTestsDataLoader {
private static final TestsDataset LANGUAGES = new TestsDataset("languages", "mapping-languages.json", "languages.csv");
private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs", "mapping-ul_logs.json", "ul_logs.csv");
private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data", "mapping-sample_data.json", "sample_data.csv");
+ private static final TestsDataset SAMPLE_DATA_STR = new TestsDataset(
+ "sample_data_str",
+ "mapping-sample_data_str.json",
+ "sample_data_str.csv"
+ );
+ private static final TestsDataset SAMPLE_DATA_TS_LONG = new TestsDataset(
+ "sample_data_ts_long",
+ "mapping-sample_data_ts_long.json",
+ "sample_data_ts_long.csv"
+ );
private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips", "mapping-clientips.json", "clientips.csv");
private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr", "mapping-client_cidr.json", "client_cidr.csv");
private static final TestsDataset AGES = new TestsDataset("ages", "mapping-ages.json", "ages.csv");
@@ -95,6 +105,8 @@ public class CsvTestsDataLoader {
Map.entry(LANGUAGES.indexName, LANGUAGES),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
+ Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR),
+ Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG),
Map.entry(CLIENT_IPS.indexName, CLIENT_IPS),
Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR),
Map.entry(AGES.indexName, AGES),
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json
new file mode 100644
index 0000000000000..9e97de8c92928
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_str.json
@@ -0,0 +1,16 @@
+{
+ "properties": {
+ "@timestamp": {
+ "type": "date"
+ },
+ "client_ip": {
+ "type": "keyword"
+ },
+ "event_duration": {
+ "type": "long"
+ },
+ "message": {
+ "type": "keyword"
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json
new file mode 100644
index 0000000000000..ecf21a2a919d0
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-sample_data_ts_long.json
@@ -0,0 +1,16 @@
+{
+ "properties": {
+ "@timestamp": {
+ "type": "long"
+ },
+ "client_ip": {
+ "type": "ip"
+ },
+ "event_duration": {
+ "type": "long"
+ },
+ "message": {
+ "type": "keyword"
+ }
+ }
+}
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv
new file mode 100644
index 0000000000000..bc98671adc7ff
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_str.csv
@@ -0,0 +1,8 @@
+@timestamp:date,client_ip:keyword,event_duration:long,message:keyword
+2023-10-23T13:55:01.543Z,172.21.3.15,1756467,Connected to 10.1.0.1
+2023-10-23T13:53:55.832Z,172.21.3.15,5033755,Connection error
+2023-10-23T13:52:55.015Z,172.21.3.15,8268153,Connection error
+2023-10-23T13:51:54.732Z,172.21.3.15,725448,Connection error
+2023-10-23T13:33:34.937Z,172.21.0.5,1232382,Disconnected
+2023-10-23T12:27:28.948Z,172.21.2.113,2764889,Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z,172.21.2.162,3450233,Connected to 10.1.0.3
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv
new file mode 100644
index 0000000000000..2a6add2ea624d
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/sample_data_ts_long.csv
@@ -0,0 +1,8 @@
+@timestamp:long,client_ip:ip,event_duration:long,message:keyword
+1698069301543,172.21.3.15,1756467,Connected to 10.1.0.1
+1698069235832,172.21.3.15,5033755,Connection error
+1698069175015,172.21.3.15,8268153,Connection error
+1698069114732,172.21.3.15,725448,Connection error
+1698068014937,172.21.0.5,1232382,Disconnected
+1698064048948,172.21.2.113,2764889,Connected to 10.1.0.2
+1698063303360,172.21.2.162,3450233,Connected to 10.1.0.3
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
new file mode 100644
index 0000000000000..ee8c4be385e0f
--- /dev/null
+++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec
@@ -0,0 +1,719 @@
+singleIndexIp
+FROM sample_data
+| EVAL client_ip = TO_IP(client_ip)
+| KEEP @timestamp, client_ip, event_duration, message
+| SORT @timestamp DESC
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+singleIndexWhereIpLike
+FROM sample_data
+| WHERE TO_STRING(client_ip) LIKE "172.21.2.*"
+| KEEP @timestamp, event_duration, message
+| SORT @timestamp DESC
+;
+
+@timestamp:date | event_duration:long | message:keyword
+2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3
+;
+
+singleIndexTsLong
+FROM sample_data_ts_long
+| EVAL @timestamp = TO_DATETIME(@timestamp)
+| KEEP @timestamp, client_ip, event_duration, message
+| SORT @timestamp DESC
+;
+
+@timestamp:date | client_ip:ip | event_duration:long | message:keyword
+2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+singleIndexIpStats
+FROM sample_data
+| EVAL client_ip = TO_IP(client_ip)
+| STATS count=count(*) BY client_ip
+| SORT count DESC, client_ip ASC
+| KEEP count, client_ip
+;
+
+count:long | client_ip:ip
+4 | 172.21.3.15
+1 | 172.21.0.5
+1 | 172.21.2.113
+1 | 172.21.2.162
+;
+
+singleIndexIpStringStats
+FROM sample_data_str
+| EVAL client_ip = TO_IP(client_ip)
+| STATS count=count(*) BY client_ip
+| SORT count DESC, client_ip ASC
+| KEEP count, client_ip
+;
+
+count:long | client_ip:ip
+4 | 172.21.3.15
+1 | 172.21.0.5
+1 | 172.21.2.113
+1 | 172.21.2.162
+;
+
+multiIndexIpString
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_str METADATA _index
+| EVAL client_ip = TO_IP(client_ip)
+| KEEP _index, @timestamp, client_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringRename
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_str METADATA _index
+| EVAL host_ip = TO_IP(client_ip)
+| KEEP _index, @timestamp, host_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | host_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringRenameToString
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_str METADATA _index
+| EVAL host_ip = TO_STRING(TO_IP(client_ip))
+| KEEP _index, @timestamp, host_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | host_ip:keyword | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpString
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_str METADATA _index
+| WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2")
+| KEEP _index, @timestamp, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | event_duration:long | message:keyword
+sample_data | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpStringLike
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_str METADATA _index
+| WHERE TO_STRING(client_ip) LIKE "172.21.2.*"
+| KEEP _index, @timestamp, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | event_duration:long | message:keyword
+sample_data | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringStats
+required_capability: union_types
+
+FROM sample_data, sample_data_str
+| EVAL client_ip = TO_IP(client_ip)
+| STATS count=count(*) BY client_ip
+| SORT count DESC, client_ip ASC
+| KEEP count, client_ip
+;
+
+count:long | client_ip:ip
+8 | 172.21.3.15
+2 | 172.21.0.5
+2 | 172.21.2.113
+2 | 172.21.2.162
+;
+
+multiIndexIpStringRenameStats
+required_capability: union_types
+
+FROM sample_data, sample_data_str
+| EVAL host_ip = TO_IP(client_ip)
+| STATS count=count(*) BY host_ip
+| SORT count DESC, host_ip ASC
+| KEEP count, host_ip
+;
+
+count:long | host_ip:ip
+8 | 172.21.3.15
+2 | 172.21.0.5
+2 | 172.21.2.113
+2 | 172.21.2.162
+;
+
+multiIndexIpStringRenameToStringStats
+required_capability: union_types
+
+FROM sample_data, sample_data_str
+| EVAL host_ip = TO_STRING(TO_IP(client_ip))
+| STATS count=count(*) BY host_ip
+| SORT count DESC, host_ip ASC
+| KEEP count, host_ip
+;
+
+count:long | host_ip:keyword
+8 | 172.21.3.15
+2 | 172.21.0.5
+2 | 172.21.2.113
+2 | 172.21.2.162
+;
+
+multiIndexIpStringStatsInline
+required_capability: union_types
+required_capability: union_types_inline_fix
+
+FROM sample_data, sample_data_str
+| STATS count=count(*) BY client_ip = TO_IP(client_ip)
+| SORT count DESC, client_ip ASC
+| KEEP count, client_ip
+;
+
+count:long | client_ip:ip
+8 | 172.21.3.15
+2 | 172.21.0.5
+2 | 172.21.2.113
+2 | 172.21.2.162
+;
+
+multiIndexWhereIpStringStats
+required_capability: union_types
+
+FROM sample_data, sample_data_str
+| WHERE STARTS_WITH(TO_STRING(client_ip), "172.21.2")
+| STATS count=count(*) BY message
+| SORT count DESC, message ASC
+| KEEP count, message
+;
+
+count:long | message:keyword
+2 | Connected to 10.1.0.2
+2 | Connected to 10.1.0.3
+;
+
+multiIndexTsLong
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_ts_long METADATA _index
+| EVAL @timestamp = TO_DATETIME(@timestamp)
+| KEEP _index, @timestamp, client_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexTsLongRename
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_ts_long METADATA _index
+| EVAL ts = TO_DATETIME(@timestamp)
+| KEEP _index, ts, client_ip, event_duration, message
+| SORT _index ASC, ts DESC
+;
+
+_index:keyword | ts:date | client_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexTsLongRenameToString
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_ts_long METADATA _index
+| EVAL ts = TO_STRING(TO_DATETIME(@timestamp))
+| KEEP _index, ts, client_ip, event_duration, message
+| SORT _index ASC, ts DESC
+;
+
+_index:keyword | ts:keyword | client_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereTsLong
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data, sample_data_ts_long METADATA _index
+| WHERE TO_LONG(@timestamp) < 1698068014937
+| KEEP _index, client_ip, event_duration, message
+| SORT _index ASC, client_ip ASC
+;
+
+_index:keyword | client_ip:ip | event_duration:long | message:keyword
+sample_data | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexTsLongStats
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| EVAL @timestamp = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp))
+| STATS count=count(*) BY @timestamp
+| SORT count DESC, @timestamp ASC
+| KEEP count, @timestamp
+;
+
+count:long | @timestamp:date
+10 | 2023-10-23T13:00:00.000Z
+4 | 2023-10-23T12:00:00.000Z
+;
+
+multiIndexTsLongRenameStats
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| EVAL hour = DATE_TRUNC(1 hour, TO_DATETIME(@timestamp))
+| STATS count=count(*) BY hour
+| SORT count DESC, hour ASC
+| KEEP count, hour
+;
+
+count:long | hour:date
+10 | 2023-10-23T13:00:00.000Z
+4 | 2023-10-23T12:00:00.000Z
+;
+
+multiIndexTsLongRenameToDatetimeToStringStats
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| EVAL hour = LEFT(TO_STRING(TO_DATETIME(@timestamp)), 13)
+| STATS count=count(*) BY hour
+| SORT count DESC, hour ASC
+| KEEP count, hour
+;
+
+count:long | hour:keyword
+10 | 2023-10-23T13
+4 | 2023-10-23T12
+;
+
+multiIndexTsLongRenameToStringStats
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| EVAL mess = LEFT(TO_STRING(@timestamp), 7)
+| STATS count=count(*) BY mess
+| SORT count DESC, mess DESC
+| KEEP count, mess
+;
+
+count:long | mess:keyword
+7 | 2023-10
+4 | 1698069
+1 | 1698068
+1 | 1698064
+1 | 1698063
+;
+
+multiIndexTsLongStatsInline
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| STATS count=COUNT(*), max=MAX(TO_DATETIME(@timestamp))
+| KEEP count, max
+;
+
+count:long | max:date
+14 | 2023-10-23T13:55:01.543Z
+;
+
+multiIndexTsLongStatsInlineDropped
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| STATS count=COUNT(*), max=MAX(TO_DATETIME(@timestamp))
+| KEEP count
+;
+
+count:long
+14
+;
+
+multiIndexWhereTsLongStats
+required_capability: union_types
+
+FROM sample_data, sample_data_ts_long
+| WHERE TO_LONG(@timestamp) < 1698068014937
+| STATS count=count(*) BY message
+| SORT count DESC, message ASC
+| KEEP count, message
+;
+
+count:long | message:keyword
+2 | Connected to 10.1.0.2
+2 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringTsLong
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| EVAL @timestamp = TO_DATETIME(@timestamp), client_ip = TO_IP(client_ip)
+| KEEP _index, @timestamp, client_ip, event_duration, message
+| SORT _index ASC, @timestamp DESC
+;
+
+_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringTsLongDropped
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| EVAL @timestamp = TO_DATETIME(@timestamp), client_ip = TO_IP(client_ip)
+| KEEP _index, event_duration, message
+| SORT _index ASC, event_duration ASC
+;
+
+_index:keyword | event_duration:long | message:keyword
+sample_data | 725448 | Connection error
+sample_data | 1232382 | Disconnected
+sample_data | 1756467 | Connected to 10.1.0.1
+sample_data | 2764889 | Connected to 10.1.0.2
+sample_data | 3450233 | Connected to 10.1.0.3
+sample_data | 5033755 | Connection error
+sample_data | 8268153 | Connection error
+sample_data_str | 725448 | Connection error
+sample_data_str | 1232382 | Disconnected
+sample_data_str | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2764889 | Connected to 10.1.0.2
+sample_data_str | 3450233 | Connected to 10.1.0.3
+sample_data_str | 5033755 | Connection error
+sample_data_str | 8268153 | Connection error
+sample_data_ts_long | 725448 | Connection error
+sample_data_ts_long | 1232382 | Disconnected
+sample_data_ts_long | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 5033755 | Connection error
+sample_data_ts_long | 8268153 | Connection error
+;
+
+multiIndexIpStringTsLongRename
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| EVAL ts = TO_DATETIME(@timestamp), host_ip = TO_IP(client_ip)
+| KEEP _index, ts, host_ip, event_duration, message
+| SORT _index ASC, ts DESC
+;
+
+_index:keyword | ts:date | host_ip:ip | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexIpStringTsLongRenameDropped
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| EVAL ts = TO_DATETIME(@timestamp), host_ip = TO_IP(client_ip)
+| KEEP _index, event_duration, message
+| SORT _index ASC, event_duration ASC
+;
+
+_index:keyword | event_duration:long | message:keyword
+sample_data | 725448 | Connection error
+sample_data | 1232382 | Disconnected
+sample_data | 1756467 | Connected to 10.1.0.1
+sample_data | 2764889 | Connected to 10.1.0.2
+sample_data | 3450233 | Connected to 10.1.0.3
+sample_data | 5033755 | Connection error
+sample_data | 8268153 | Connection error
+sample_data_str | 725448 | Connection error
+sample_data_str | 1232382 | Disconnected
+sample_data_str | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2764889 | Connected to 10.1.0.2
+sample_data_str | 3450233 | Connected to 10.1.0.3
+sample_data_str | 5033755 | Connection error
+sample_data_str | 8268153 | Connection error
+sample_data_ts_long | 725448 | Connection error
+sample_data_ts_long | 1232382 | Disconnected
+sample_data_ts_long | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 5033755 | Connection error
+sample_data_ts_long | 8268153 | Connection error
+;
+
+multiIndexIpStringTsLongRenameToString
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| EVAL ts = TO_STRING(TO_DATETIME(@timestamp)), host_ip = TO_STRING(TO_IP(client_ip))
+| KEEP _index, ts, host_ip, event_duration, message
+| SORT _index ASC, ts DESC
+;
+
+_index:keyword | ts:keyword | host_ip:keyword | event_duration:long | message:keyword
+sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_str | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_str | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_str | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_str | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_str | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_str | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_str | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1
+sample_data_ts_long | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error
+sample_data_ts_long | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error
+sample_data_ts_long | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error
+sample_data_ts_long | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected
+sample_data_ts_long | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2
+sample_data_ts_long | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpStringTsLong
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162"
+| KEEP _index, event_duration, message
+| SORT _index ASC, message ASC
+;
+
+_index:keyword | event_duration:long | message:keyword
+sample_data | 3450233 | Connected to 10.1.0.3
+sample_data_str | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpStringTsLongStats
+required_capability: union_types
+
+FROM sample_data*
+| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) == "172.21.2.162"
+| STATS count=count(*) BY message
+| SORT count DESC, message ASC
+| KEEP count, message
+;
+
+count:long | message:keyword
+3 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpStringLikeTsLong
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?"
+| KEEP _index, event_duration, message
+| SORT _index ASC, message ASC
+;
+
+_index:keyword | event_duration:long | message:keyword
+sample_data | 3450233 | Connected to 10.1.0.3
+sample_data_str | 3450233 | Connected to 10.1.0.3
+sample_data_ts_long | 3450233 | Connected to 10.1.0.3
+;
+
+multiIndexWhereIpStringLikeTsLongStats
+required_capability: union_types
+
+FROM sample_data*
+| WHERE TO_LONG(@timestamp) < 1698068014937 AND TO_STRING(client_ip) LIKE "172.21.2.16?"
+| STATS count=count(*) BY message
+| SORT count DESC, message ASC
+| KEEP count, message
+;
+
+count:long | message:keyword
+3 | Connected to 10.1.0.3
+;
+
+multiIndexMultiColumnTypesRename
+required_capability: union_types
+required_capability: metadata_fields
+
+FROM sample_data* METADATA _index
+| WHERE event_duration > 8000000
+| EVAL ts = TO_DATETIME(@timestamp), ts_str = TO_STRING(@timestamp), ts_l = TO_LONG(@timestamp), ip = TO_IP(client_ip), ip_str = TO_STRING(client_ip)
+| SORT _index ASC, ts DESC
+;
+
+@timestamp:null | client_ip:null | event_duration:long | message:keyword | _index:keyword | ts:date | ts_str:keyword | ts_l:long | ip:ip | ip_str:k
+null | null | 8268153 | Connection error | sample_data | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15
+null | null | 8268153 | Connection error | sample_data_str | 2023-10-23T13:52:55.015Z | 2023-10-23T13:52:55.015Z | 1698069175015 | 172.21.3.15 | 172.21.3.15
+null | null | 8268153 | Connection error | sample_data_ts_long | 2023-10-23T13:52:55.015Z | 1698069175015 | 1698069175015 | 172.21.3.15 | 172.21.3.15
+;
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
index e65f574422dd5..654c1ffd8a5e9 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java
@@ -77,6 +77,11 @@ public class EsqlCapabilities {
*/
public static final String STRING_LITERAL_AUTO_CASTING_TO_DATETIME_ADD_SUB = "string_literal_auto_casting_to_datetime_add_sub";
+ /**
+ * Support multiple field mappings if appropriate conversion function is used (union types)
+ */
+ public static final String UNION_TYPES = "union_types";
+
/**
* Support for named or positional parameters in EsqlQueryRequest.
*/
@@ -94,6 +99,7 @@ private static Set capabilities() {
caps.add(METADATA_IGNORED_FIELD);
caps.add(FN_MV_APPEND);
caps.add(REPEAT);
+ caps.add(UNION_TYPES);
caps.add(NAMED_POSITIONAL_PARAMETER);
if (Build.current().isSnapshot()) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
index 70fbe17a7d470..77a51c8415545 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java
@@ -27,6 +27,7 @@
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
+import org.elasticsearch.xpack.esql.core.expression.NameId;
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
import org.elasticsearch.xpack.esql.core.expression.Nullability;
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
@@ -59,6 +60,7 @@
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.DateTimeArithmeticOperation;
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
@@ -80,11 +82,13 @@
import org.elasticsearch.xpack.esql.stats.FeatureMetric;
import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
+import org.elasticsearch.xpack.esql.type.MultiTypeEsField;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collection;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
@@ -132,8 +136,13 @@ public class Analyzer extends ParameterizedRuleExecutor("Resolution", new ResolveRefs(), new ImplicitCasting());
- var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit());
+ var resolution = new Batch<>(
+ "Resolution",
+ new ResolveRefs(),
+ new ResolveUnionTypes(), // Must be after ResolveRefs, so union types can be found
+ new ImplicitCasting()
+ );
+ var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnresolveUnionTypes());
rules = List.of(init, resolution, finish);
}
@@ -851,14 +860,6 @@ private static List potentialCandidatesIfNoMatchesFound(
}
private static Attribute handleSpecialFields(UnresolvedAttribute u, Attribute named) {
- if (named instanceof FieldAttribute fa) {
- // incompatible mappings
- var field = fa.field();
- if (field instanceof InvalidMappedField imf) {
- named = u.withUnresolvedMessage("Cannot use field [" + fa.name() + "] due to ambiguities being " + imf.errorMessage());
- }
- }
-
return named.withLocation(u.source());
}
@@ -1061,4 +1062,155 @@ public static Expression castStringLiteral(Expression from, DataType target) {
}
}
}
+
+ /**
+ * The EsqlIndexResolver will create InvalidMappedField instances for fields that are ambiguous (i.e. have multiple mappings).
+ * During ResolveRefs we do not convert these to UnresolvedAttribute instances, as we want to first determine if they can
+ * instead be handled by conversion functions within the query. This rule looks for matching conversion functions and converts
+ * those fields into MultiTypeEsField, which encapsulates the knowledge of how to convert these into a single type.
+ * This knowledge will be used later in generating the FieldExtractExec with built-in type conversion.
+ * Any fields which could not be resolved by conversion functions will be converted to UnresolvedAttribute instances in a later rule
+ * (See UnresolveUnionTypes below).
+ */
+ private static class ResolveUnionTypes extends BaseAnalyzerRule {
+
+ record TypeResolutionKey(String fieldName, DataType fieldType) {}
+
+ @Override
+ protected LogicalPlan doRule(LogicalPlan plan) {
+ List unionFieldAttributes = new ArrayList<>();
+ // See if the eval function has an unresolved MultiTypeEsField field
+ // Replace the entire convert function with a new FieldAttribute (containing type conversion knowledge)
+ plan = plan.transformExpressionsOnly(
+ AbstractConvertFunction.class,
+ convert -> resolveConvertFunction(convert, unionFieldAttributes)
+ );
+ // If no union fields were generated, return the plan as is
+ if (unionFieldAttributes.isEmpty()) {
+ return plan;
+ }
+
+ // Otherwise drop the converted attributes after the alias function, as they are only needed for this function, and
+ // the original version of the attribute should still be seen as unconverted.
+ plan = dropConvertedAttributes(plan, unionFieldAttributes);
+
+ // And add generated fields to EsRelation, so these new attributes will appear in the OutputExec of the Fragment
+ // and thereby get used in FieldExtractExec
+ plan = plan.transformDown(EsRelation.class, esr -> {
+ List output = esr.output();
+ List missing = new ArrayList<>();
+ for (FieldAttribute fa : unionFieldAttributes) {
+ if (output.stream().noneMatch(a -> a.id().equals(fa.id()))) {
+ missing.add(fa);
+ }
+ }
+ if (missing.isEmpty() == false) {
+ output.addAll(missing);
+ return new EsRelation(esr.source(), esr.index(), output, esr.indexMode(), esr.frozen());
+ }
+ return esr;
+ });
+ return plan;
+ }
+
+ private LogicalPlan dropConvertedAttributes(LogicalPlan plan, List unionFieldAttributes) {
+ List projections = new ArrayList<>(plan.output());
+ for (var e : unionFieldAttributes) {
+ projections.removeIf(p -> p.id().equals(e.id()));
+ }
+ if (projections.size() != plan.output().size()) {
+ return new EsqlProject(plan.source(), plan, projections);
+ }
+ return plan;
+ }
+
+ private Expression resolveConvertFunction(AbstractConvertFunction convert, List unionFieldAttributes) {
+ if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) {
+ HashMap typeResolutions = new HashMap<>();
+ Set supportedTypes = convert.supportedTypes();
+ imf.getTypesToIndices().keySet().forEach(typeName -> {
+ DataType type = DataType.fromTypeName(typeName);
+ if (supportedTypes.contains(type)) {
+ TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
+ var concreteConvert = typeSpecificConvert(convert, fa.source(), type, imf);
+ typeResolutions.put(key, concreteConvert);
+ }
+ });
+ // If all mapped types were resolved, create a new FieldAttribute with the resolved MultiTypeEsField
+ if (typeResolutions.size() == imf.getTypesToIndices().size()) {
+ var resolvedField = resolvedMultiTypeEsField(fa, typeResolutions);
+ return createIfDoesNotAlreadyExist(fa, resolvedField, unionFieldAttributes);
+ }
+ } else if (convert.field() instanceof AbstractConvertFunction subConvert) {
+ return convert.replaceChildren(Collections.singletonList(resolveConvertFunction(subConvert, unionFieldAttributes)));
+ }
+ return convert;
+ }
+
+ private Expression createIfDoesNotAlreadyExist(
+ FieldAttribute fa,
+ MultiTypeEsField resolvedField,
+ List unionFieldAttributes
+ ) {
+ var unionFieldAttribute = new FieldAttribute(fa.source(), fa.name(), resolvedField); // Generates new ID for the field
+ int existingIndex = unionFieldAttributes.indexOf(unionFieldAttribute);
+ if (existingIndex >= 0) {
+ // Do not generate multiple name/type combinations with different IDs
+ return unionFieldAttributes.get(existingIndex);
+ } else {
+ unionFieldAttributes.add(unionFieldAttribute);
+ return unionFieldAttribute;
+ }
+ }
+
+ private MultiTypeEsField resolvedMultiTypeEsField(FieldAttribute fa, HashMap typeResolutions) {
+ Map typesToConversionExpressions = new HashMap<>();
+ InvalidMappedField imf = (InvalidMappedField) fa.field();
+ imf.getTypesToIndices().forEach((typeName, indexNames) -> {
+ DataType type = DataType.fromTypeName(typeName);
+ TypeResolutionKey key = new TypeResolutionKey(fa.name(), type);
+ if (typeResolutions.containsKey(key)) {
+ typesToConversionExpressions.put(typeName, typeResolutions.get(key));
+ }
+ });
+ return MultiTypeEsField.resolveFrom(imf, typesToConversionExpressions);
+ }
+
+ private Expression typeSpecificConvert(AbstractConvertFunction convert, Source source, DataType type, InvalidMappedField mtf) {
+ EsField field = new EsField(mtf.getName(), type, mtf.getProperties(), mtf.isAggregatable());
+ NameId id = ((FieldAttribute) convert.field()).id();
+ FieldAttribute resolvedAttr = new FieldAttribute(source, null, field.getName(), field, null, Nullability.TRUE, id, false);
+ return convert.replaceChildren(Collections.singletonList(resolvedAttr));
+ }
+ }
+
+ /**
+ * If there was no AbstractConvertFunction that resolved multi-type fields in the ResolveUnionTypes rules,
+ * then there could still be some FieldAttributes that contain unresolved MultiTypeEsFields.
+ * These need to be converted back to actual UnresolvedAttribute in order for validation to generate appropriate failures.
+ */
+ private static class UnresolveUnionTypes extends AnalyzerRules.AnalyzerRule {
+ @Override
+ protected boolean skipResolved() {
+ return false;
+ }
+
+ @Override
+ protected LogicalPlan rule(LogicalPlan plan) {
+ if (plan instanceof EsRelation esRelation) {
+ // Leave esRelation as InvalidMappedField so that UNSUPPORTED fields can still pass through
+ return esRelation;
+ }
+ return plan.transformExpressionsOnly(FieldAttribute.class, UnresolveUnionTypes::checkUnresolved);
+ }
+
+ private static Attribute checkUnresolved(FieldAttribute fa) {
+ var field = fa.field();
+ if (field instanceof InvalidMappedField imf) {
+ String unresolvedMessage = "Cannot use field [" + fa.name() + "] due to ambiguities being " + imf.errorMessage();
+ return new UnresolvedAttribute(fa.source(), fa.name(), fa.qualifier(), fa.id(), unresolvedMessage, null);
+ }
+ return fa;
+ }
+ }
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java
index 2496d8b82fa6f..96601905d40c9 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/AbstractConvertFunction.java
@@ -77,7 +77,11 @@ protected final TypeResolution resolveType() {
if (childrenResolved() == false) {
return new TypeResolution("Unresolved children");
}
- return isType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(factories().keySet()));
+ return isType(field(), factories()::containsKey, sourceText(), null, supportedTypesNames(supportedTypes()));
+ }
+
+ public Set supportedTypes() {
+ return factories().keySet();
}
public static String supportedTypesNames(Set types) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java
index b7e4fc9ae622f..08916c14e91bf 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/EsRelation.java
@@ -92,6 +92,8 @@ public List output() {
@Override
public boolean expressionsResolved() {
+ // For unresolved expressions to exist in EsRelation is fine, as long as they are not used in later operations
+ // This allows for them to be converted to null@unsupported fields in final output, an important feature of ES|QL
return true;
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
index 04ed433200c2f..fdba785f668d7 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java
@@ -7,20 +7,28 @@
package org.elasticsearch.xpack.esql.planner;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.elasticsearch.common.breaker.CircuitBreaker;
+import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.logging.HeaderWarning;
+import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.compute.aggregation.GroupingAggregator;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.Page;
import org.elasticsearch.compute.lucene.LuceneCountOperator;
import org.elasticsearch.compute.lucene.LuceneOperator;
import org.elasticsearch.compute.lucene.LuceneSourceOperator;
import org.elasticsearch.compute.lucene.LuceneTopNSourceOperator;
import org.elasticsearch.compute.lucene.TimeSeriesSortedSourceOperatorFactory;
import org.elasticsearch.compute.lucene.ValuesSourceReaderOperator;
+import org.elasticsearch.compute.operator.DriverContext;
+import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.compute.operator.Operator;
import org.elasticsearch.compute.operator.OrdinalsGroupingOperator;
import org.elasticsearch.compute.operator.SourceOperator;
@@ -35,13 +43,16 @@
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.search.NestedHelper;
+import org.elasticsearch.search.fetch.StoredFieldsSpec;
import org.elasticsearch.search.internal.AliasFilter;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.plan.physical.AggregateExec;
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec.FieldSort;
@@ -50,6 +61,7 @@
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.LocalExecutionPlannerContext;
import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.PhysicalOperation;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
+import org.elasticsearch.xpack.esql.type.MultiTypeEsField;
import java.io.IOException;
import java.util.ArrayList;
@@ -102,17 +114,42 @@ public final PhysicalOperation fieldExtractPhysicalOperation(FieldExtractExec fi
var docValuesAttrs = fieldExtractExec.docValuesAttributes();
for (Attribute attr : fieldExtractExec.attributesToExtract()) {
layout.append(attr);
+ var unionTypes = findUnionTypes(attr);
DataType dataType = attr.dataType();
MappedFieldType.FieldExtractPreference fieldExtractPreference = PlannerUtils.extractPreference(docValuesAttrs.contains(attr));
ElementType elementType = PlannerUtils.toElementType(dataType, fieldExtractPreference);
String fieldName = attr.name();
boolean isUnsupported = EsqlDataTypes.isUnsupported(dataType);
- IntFunction loader = s -> shardContexts.get(s).blockLoader(fieldName, isUnsupported, fieldExtractPreference);
+ IntFunction loader = s -> getBlockLoaderFor(s, fieldName, isUnsupported, fieldExtractPreference, unionTypes);
fields.add(new ValuesSourceReaderOperator.FieldInfo(fieldName, elementType, loader));
}
return source.with(new ValuesSourceReaderOperator.Factory(fields, readers, docChannel), layout.build());
}
+ private BlockLoader getBlockLoaderFor(
+ int shardId,
+ String fieldName,
+ boolean isUnsupported,
+ MappedFieldType.FieldExtractPreference fieldExtractPreference,
+ MultiTypeEsField unionTypes
+ ) {
+ DefaultShardContext shardContext = (DefaultShardContext) shardContexts.get(shardId);
+ BlockLoader blockLoader = shardContext.blockLoader(fieldName, isUnsupported, fieldExtractPreference);
+ if (unionTypes != null) {
+ String indexName = shardContext.ctx.index().getName();
+ Expression conversion = unionTypes.getConversionExpressionForIndex(indexName);
+ return new TypeConvertingBlockLoader(blockLoader, (AbstractConvertFunction) conversion);
+ }
+ return blockLoader;
+ }
+
+ private MultiTypeEsField findUnionTypes(Attribute attr) {
+ if (attr instanceof FieldAttribute fa && fa.field() instanceof MultiTypeEsField multiTypeEsField) {
+ return multiTypeEsField;
+ }
+ return null;
+ }
+
public Function querySupplier(QueryBuilder builder) {
QueryBuilder qb = builder == null ? QueryBuilders.matchAllQuery() : builder;
return ctx -> shardContexts.get(ctx.index()).toQuery(qb);
@@ -321,4 +358,96 @@ public FieldNamesFieldMapper.FieldNamesFieldType fieldNames() {
return loader;
}
}
+
+ static class TypeConvertingBlockLoader implements BlockLoader {
+ protected final BlockLoader delegate;
+ private final EvalOperator.ExpressionEvaluator convertEvaluator;
+
+ protected TypeConvertingBlockLoader(BlockLoader delegate, AbstractConvertFunction convertFunction) {
+ this.delegate = delegate;
+ DriverContext driverContext1 = new DriverContext(
+ BigArrays.NON_RECYCLING_INSTANCE,
+ new org.elasticsearch.compute.data.BlockFactory(
+ new NoopCircuitBreaker(CircuitBreaker.REQUEST),
+ BigArrays.NON_RECYCLING_INSTANCE
+ )
+ );
+ this.convertEvaluator = convertFunction.toEvaluator(e -> driverContext -> new EvalOperator.ExpressionEvaluator() {
+ @Override
+ public org.elasticsearch.compute.data.Block eval(Page page) {
+ // This is a pass-through evaluator, since it sits directly on the source loading (no prior expressions)
+ return page.getBlock(0);
+ }
+
+ @Override
+ public void close() {}
+ }).get(driverContext1);
+ }
+
+ @Override
+ public Builder builder(BlockFactory factory, int expectedCount) {
+ // Return the delegates builder, which can build the original mapped type, before conversion
+ return delegate.builder(factory, expectedCount);
+ }
+
+ @Override
+ public Block convert(Block block) {
+ Page page = new Page((org.elasticsearch.compute.data.Block) block);
+ return convertEvaluator.eval(page);
+ }
+
+ @Override
+ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException {
+ ColumnAtATimeReader reader = delegate.columnAtATimeReader(context);
+ if (reader == null) {
+ return null;
+ }
+ return new ColumnAtATimeReader() {
+ @Override
+ public Block read(BlockFactory factory, Docs docs) throws IOException {
+ Block block = reader.read(factory, docs);
+ Page page = new Page((org.elasticsearch.compute.data.Block) block);
+ org.elasticsearch.compute.data.Block converted = convertEvaluator.eval(page);
+ return converted;
+ }
+
+ @Override
+ public boolean canReuse(int startingDocID) {
+ return reader.canReuse(startingDocID);
+ }
+
+ @Override
+ public String toString() {
+ return reader.toString();
+ }
+ };
+ }
+
+ @Override
+ public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException {
+ // We do no type conversion here, since that will be done in the ValueSourceReaderOperator for row-stride cases
+ // Using the BlockLoader.convert(Block) function defined above
+ return delegate.rowStrideReader(context);
+ }
+
+ @Override
+ public StoredFieldsSpec rowStrideStoredFieldSpec() {
+ return delegate.rowStrideStoredFieldSpec();
+ }
+
+ @Override
+ public boolean supportsOrdinals() {
+ return delegate.supportsOrdinals();
+ }
+
+ @Override
+ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
+ return delegate.ordinals(context);
+ }
+
+ @Override
+ public final String toString() {
+ return "TypeConvertingBlockLoader[delegate=" + delegate + ", convertEvaluator=" + convertEvaluator + "]";
+ }
+ }
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
index d3b2d5c6e7646..fc00f5be22624 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java
@@ -64,6 +64,7 @@
import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery;
import org.elasticsearch.xpack.esql.session.IndexResolver;
import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry;
+import org.elasticsearch.xpack.esql.type.MultiTypeEsField;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@@ -188,6 +189,7 @@ public List getNamedWriteables() {
entries.add(UnsupportedAttribute.ENTRY); // TODO combine with above once these are in the same project
entries.addAll(NamedExpression.getNamedWriteables());
entries.add(UnsupportedAttribute.NAMED_EXPRESSION_ENTRY); // TODO combine with above once these are in the same project
+ entries.add(MultiTypeEsField.ENTRY); // TODO combine with EsField.getNamedWriteables() once these are in the same module
return entries;
}
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
index 983a45f36169e..5fd7f0c230463 100644
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java
@@ -225,26 +225,10 @@ private EsField conflictingTypes(String name, String fullName, FieldCapabilities
if (type == UNSUPPORTED) {
return unsupported(name, fc);
}
- typesToIndices.computeIfAbsent(type.esType(), _key -> new TreeSet<>()).add(ir.getIndexName());
+ typesToIndices.computeIfAbsent(type.typeName(), _key -> new TreeSet<>()).add(ir.getIndexName());
}
}
- StringBuilder errorMessage = new StringBuilder();
- errorMessage.append("mapped as [");
- errorMessage.append(typesToIndices.size());
- errorMessage.append("] incompatible types: ");
- boolean first = true;
- for (Map.Entry> e : typesToIndices.entrySet()) {
- if (first) {
- first = false;
- } else {
- errorMessage.append(", ");
- }
- errorMessage.append("[");
- errorMessage.append(e.getKey());
- errorMessage.append("] in ");
- errorMessage.append(e.getValue());
- }
- return new InvalidMappedField(name, errorMessage.toString());
+ return new InvalidMappedField(name, typesToIndices);
}
private EsField conflictingMetricTypes(String name, String fullName, FieldCapabilitiesResponse fieldCapsResponse) {
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java
new file mode 100644
index 0000000000000..2b963e7428e2b
--- /dev/null
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/MultiTypeEsField.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.type;
+
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.EsField;
+import org.elasticsearch.xpack.esql.core.type.InvalidMappedField;
+import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+
+/**
+ * During IndexResolution it could occur that the same field is mapped to different types in different indices.
+ * The class MultiTypeEfField.UnresolvedField holds that information and allows for later resolution of the field
+ * to a single type during LogicalPlanOptimization.
+ * If the plan contains conversion expressions for the different types, the resolution will be done using the conversion expressions,
+ * in which case a MultiTypeEsField will be created to encapsulate the type resolution capabilities.
+ * This class can be communicated to the data nodes and used during physical planning to influence field extraction so that
+ * type conversion is done at the data node level.
+ */
+public class MultiTypeEsField extends EsField {
+ public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
+ EsField.class,
+ "MultiTypeEsField",
+ MultiTypeEsField::new
+ );
+
+ private final Map indexToConversionExpressions;
+
+ public MultiTypeEsField(String name, DataType dataType, boolean aggregatable, Map indexToConversionExpressions) {
+ super(name, dataType, Map.of(), aggregatable);
+ this.indexToConversionExpressions = indexToConversionExpressions;
+ }
+
+ public MultiTypeEsField(StreamInput in) throws IOException {
+ // TODO: Change the conversion expression serialization to i.readNamedWriteable(Expression.class) once Expression is fully supported
+ this(in.readString(), DataType.readFrom(in), in.readBoolean(), in.readImmutableMap(i -> ((PlanStreamInput) i).readExpression()));
+ }
+
+ @Override
+ public void writeTo(StreamOutput out) throws IOException {
+ out.writeString(getName());
+ out.writeString(getDataType().typeName());
+ out.writeBoolean(isAggregatable());
+ out.writeMap(getIndexToConversionExpressions(), (o, v) -> out.writeNamedWriteable(v));
+ }
+
+ @Override
+ public String getWriteableName() {
+ return ENTRY.name;
+ }
+
+ public Map getIndexToConversionExpressions() {
+ return indexToConversionExpressions;
+ }
+
+ public Expression getConversionExpressionForIndex(String indexName) {
+ return indexToConversionExpressions.get(indexName);
+ }
+
+ public static MultiTypeEsField resolveFrom(
+ InvalidMappedField invalidMappedField,
+ Map typesToConversionExpressions
+ ) {
+ Map> typesToIndices = invalidMappedField.getTypesToIndices();
+ DataType resolvedDataType = DataType.UNSUPPORTED;
+ Map indexToConversionExpressions = new HashMap<>();
+ for (String typeName : typesToIndices.keySet()) {
+ Set indices = typesToIndices.get(typeName);
+ Expression convertExpr = typesToConversionExpressions.get(typeName);
+ if (resolvedDataType == DataType.UNSUPPORTED) {
+ resolvedDataType = convertExpr.dataType();
+ } else if (resolvedDataType != convertExpr.dataType()) {
+ throw new IllegalArgumentException("Resolved data type mismatch: " + resolvedDataType + " != " + convertExpr.dataType());
+ }
+ for (String indexName : indices) {
+ indexToConversionExpressions.put(indexName, convertExpr);
+ }
+ }
+ return new MultiTypeEsField(invalidMappedField.getName(), resolvedDataType, false, indexToConversionExpressions);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (super.equals(obj) == false) {
+ return false;
+ }
+ if (obj instanceof MultiTypeEsField other) {
+ return super.equals(other) && indexToConversionExpressions.equals(other.indexToConversionExpressions);
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(super.hashCode(), indexToConversionExpressions);
+ }
+
+ @Override
+ public String toString() {
+ return super.toString() + " (" + indexToConversionExpressions + ")";
+ }
+}
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
index 44466cebb7dac..27aa985efd6d0 100644
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java
@@ -222,6 +222,14 @@ public CsvTests(String fileName, String groupName, String testName, Integer line
public final void test() throws Throwable {
try {
assumeTrue("Test " + testName + " is not enabled", isEnabled(testName, Version.CURRENT));
+ /*
+ * The csv tests support all but a few features. The unsupported features
+ * are tested in integration tests.
+ */
+ assumeFalse("metadata fields aren't supported", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METADATA_FIELDS)));
+ assumeFalse("enrich can't load fields in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.ENRICH_LOAD)));
+ assumeFalse("can't load metrics in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METRICS_SYNTAX)));
+ assumeFalse("multiple indices aren't supported", testCase.requiredCapabilities.contains(EsqlCapabilities.UNION_TYPES));
if (Build.current().isSnapshot()) {
assertThat(
@@ -231,14 +239,6 @@ public final void test() throws Throwable {
);
}
- /*
- * The csv tests support all but a few features. The unsupported features
- * are tested in integration tests.
- */
- assumeFalse("metadata fields aren't supported", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METADATA_FIELDS)));
- assumeFalse("enrich can't load fields in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.ENRICH_LOAD)));
- assumeFalse("can't load metrics in csv tests", testCase.requiredCapabilities.contains(cap(EsqlFeatures.METRICS_SYNTAX)));
-
doTest();
} catch (Throwable th) {
throw reworkException(th);
@@ -334,7 +334,7 @@ private PhysicalPlan physicalPlan(LogicalPlan parsed, CsvTestsDataLoader.TestsDa
private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed) {
var preAnalysis = new PreAnalyzer().preAnalyze(parsed);
var indices = preAnalysis.indices;
- if (indices.size() == 0) {
+ if (indices.isEmpty()) {
/*
* If the data set doesn't matter we'll just grab one we know works.
* Employees is fine.
@@ -345,11 +345,23 @@ private static CsvTestsDataLoader.TestsDataset testsDataset(LogicalPlan parsed)
}
String indexName = indices.get(0).id().index();
- var dataset = CSV_DATASET_MAP.get(indexName);
- if (dataset == null) {
+ List datasets = new ArrayList<>();
+ if (indexName.endsWith("*")) {
+ String indexPrefix = indexName.substring(0, indexName.length() - 1);
+ for (var entry : CSV_DATASET_MAP.entrySet()) {
+ if (entry.getKey().startsWith(indexPrefix)) {
+ datasets.add(entry.getValue());
+ }
+ }
+ } else {
+ var dataset = CSV_DATASET_MAP.get(indexName);
+ datasets.add(dataset);
+ }
+ if (datasets.isEmpty()) {
throw new IllegalArgumentException("unknown CSV dataset for table [" + indexName + "]");
}
- return dataset;
+ // TODO: Support multiple datasets
+ return datasets.get(0);
}
private static TestPhysicalOperationProviders testOperationProviders(CsvTestsDataLoader.TestsDataset dataset) throws Exception {
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java
new file mode 100644
index 0000000000000..86baee58ca53f
--- /dev/null
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/MultiTypeEsFieldTests.java
@@ -0,0 +1,188 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.esql.type;
+
+import org.elasticsearch.TransportVersion;
+import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
+import org.elasticsearch.test.AbstractNamedWriteableTestCase;
+import org.elasticsearch.xpack.esql.core.expression.Attribute;
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.core.type.EsField;
+import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDatetime;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoPoint;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToGeoShape;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToIP;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString;
+import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToVersion;
+import org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry;
+import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
+import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput;
+import org.elasticsearch.xpack.esql.session.EsqlConfiguration;
+import org.elasticsearch.xpack.esql.session.EsqlConfigurationSerializationTests;
+import org.junit.Before;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.xpack.esql.type.EsqlDataTypes.isString;
+
+/**
+ * This test was originally based on the tests for sub-classes of EsField, like InvalidMappedFieldTests.
+ * However, it has a few important differences:
+ *
+ * - It is not in the esql.core module, but in the esql module, in order to have access to the sub-classes of AbstractConvertFunction,
+ * like ToString, which are important conversion Expressions used in the union-types feature.
+ * - It extends AbstractNamedWriteableTestCase instead of AbstractEsFieldTypeTests,
+ * in order to wrap the StreamInput with a PlanStreamInput, since Expression is not yet fully supported in the new
+ * serialization approach (NamedWritable).
+ *
+ * These differences can be minimized once Expression is fully supported in the new serialization approach, and the esql and esql.core
+ * modules are merged, or at least the relevant classes are moved.
+ */
+public class MultiTypeEsFieldTests extends AbstractNamedWriteableTestCase {
+
+ private EsqlConfiguration config;
+
+ @Before
+ public void initConfig() {
+ config = EsqlConfigurationSerializationTests.randomConfiguration();
+ }
+
+ @Override
+ protected MultiTypeEsField createTestInstance() {
+ String name = randomAlphaOfLength(4);
+ boolean toString = randomBoolean();
+ DataType dataType = randomFrom(types());
+ DataType toType = toString ? DataType.KEYWORD : dataType;
+ Map indexToConvertExpressions = randomConvertExpressions(name, toString, dataType);
+ return new MultiTypeEsField(name, toType, false, indexToConvertExpressions);
+ }
+
+ @Override
+ protected MultiTypeEsField mutateInstance(MultiTypeEsField instance) throws IOException {
+ String name = instance.getName();
+ DataType dataType = instance.getDataType();
+ Map indexToConvertExpressions = instance.getIndexToConversionExpressions();
+ switch (between(0, 2)) {
+ case 0 -> name = randomAlphaOfLength(name.length() + 1);
+ case 1 -> dataType = randomValueOtherThan(dataType, () -> randomFrom(DataType.types()));
+ case 2 -> indexToConvertExpressions = mutateConvertExpressions(name, dataType, indexToConvertExpressions);
+ default -> throw new IllegalArgumentException();
+ }
+ return new MultiTypeEsField(name, dataType, false, indexToConvertExpressions);
+ }
+
+ @Override
+ protected final NamedWriteableRegistry getNamedWriteableRegistry() {
+ List entries = new ArrayList<>(UnaryScalarFunction.getNamedWriteables());
+ entries.addAll(Attribute.getNamedWriteables());
+ entries.addAll(EsField.getNamedWriteables());
+ entries.add(new NamedWriteableRegistry.Entry(MultiTypeEsField.class, "MultiTypeEsField", MultiTypeEsField::new));
+ return new NamedWriteableRegistry(entries);
+ }
+
+ @Override
+ protected final Class categoryClass() {
+ return MultiTypeEsField.class;
+ }
+
+ @Override
+ protected final MultiTypeEsField copyInstance(MultiTypeEsField instance, TransportVersion version) throws IOException {
+ return copyInstance(
+ instance,
+ getNamedWriteableRegistry(),
+ (out, v) -> new PlanStreamOutput(out, new PlanNameRegistry(), config).writeNamedWriteable(v),
+ in -> {
+ PlanStreamInput pin = new PlanStreamInput(in, new PlanNameRegistry(), in.namedWriteableRegistry(), config);
+ return pin.readNamedWriteable(MultiTypeEsField.class);
+ },
+ version
+ );
+ }
+
+ private static Map randomConvertExpressions(String name, boolean toString, DataType dataType) {
+ Map indexToConvertExpressions = new HashMap<>();
+ if (toString) {
+ indexToConvertExpressions.put(randomAlphaOfLength(4), new ToString(Source.EMPTY, fieldAttribute(name, dataType)));
+ indexToConvertExpressions.put(randomAlphaOfLength(4), new ToString(Source.EMPTY, fieldAttribute(name, DataType.KEYWORD)));
+ } else {
+ indexToConvertExpressions.put(randomAlphaOfLength(4), testConvertExpression(name, DataType.KEYWORD, dataType));
+ indexToConvertExpressions.put(randomAlphaOfLength(4), testConvertExpression(name, dataType, dataType));
+ }
+ return indexToConvertExpressions;
+ }
+
+ private Map mutateConvertExpressions(
+ String name,
+ DataType toType,
+ Map indexToConvertExpressions
+ ) {
+ return randomValueOtherThan(
+ indexToConvertExpressions,
+ () -> randomConvertExpressions(name, toType == DataType.KEYWORD, randomFrom(types()))
+ );
+ }
+
+ private static List types() {
+ return List.of(
+ DataType.BOOLEAN,
+ DataType.DATETIME,
+ DataType.DOUBLE,
+ DataType.FLOAT,
+ DataType.INTEGER,
+ DataType.IP,
+ DataType.KEYWORD,
+ DataType.LONG,
+ DataType.GEO_POINT,
+ DataType.GEO_SHAPE,
+ DataType.CARTESIAN_POINT,
+ DataType.CARTESIAN_SHAPE,
+ DataType.VERSION
+ );
+ }
+
+ private static Expression testConvertExpression(String name, DataType fromType, DataType toType) {
+ FieldAttribute fromField = fieldAttribute(name, fromType);
+ if (isString(toType)) {
+ return new ToString(Source.EMPTY, fromField);
+ } else {
+ return switch (toType) {
+ case BOOLEAN -> new ToBoolean(Source.EMPTY, fromField);
+ case DATETIME -> new ToDatetime(Source.EMPTY, fromField);
+ case DOUBLE, FLOAT -> new ToDouble(Source.EMPTY, fromField);
+ case INTEGER -> new ToInteger(Source.EMPTY, fromField);
+ case LONG -> new ToLong(Source.EMPTY, fromField);
+ case IP -> new ToIP(Source.EMPTY, fromField);
+ case KEYWORD -> new ToString(Source.EMPTY, fromField);
+ case GEO_POINT -> new ToGeoPoint(Source.EMPTY, fromField);
+ case GEO_SHAPE -> new ToGeoShape(Source.EMPTY, fromField);
+ case CARTESIAN_POINT -> new ToCartesianPoint(Source.EMPTY, fromField);
+ case CARTESIAN_SHAPE -> new ToCartesianShape(Source.EMPTY, fromField);
+ case VERSION -> new ToVersion(Source.EMPTY, fromField);
+ default -> throw new UnsupportedOperationException("Conversion from " + fromType + " to " + toType + " is not supported");
+ };
+ }
+ }
+
+ private static FieldAttribute fieldAttribute(String name, DataType dataType) {
+ return new FieldAttribute(Source.EMPTY, name, new EsField(name, dataType, Map.of(), true));
+ }
+}
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml
new file mode 100644
index 0000000000000..f3403ca8751c0
--- /dev/null
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/160_union_types.yml
@@ -0,0 +1,573 @@
+setup:
+ - requires:
+ capabilities:
+ - method: POST
+ path: /_query
+ parameters: [method, path, parameters, capabilities]
+ capabilities: [union_types]
+ reason: "Union types introduced in 8.15.0"
+ test_runner_features: [capabilities, allowed_warnings_regex]
+
+ - do:
+ indices.create:
+ index: events_ip_long
+ body:
+ mappings:
+ properties:
+ "@timestamp":
+ type: date
+ client_ip:
+ type: ip
+ event_duration:
+ type: long
+ message:
+ type: keyword
+
+ - do:
+ bulk:
+ refresh: true
+ index: events_ip_long
+ body:
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": 1756467, "message": "Connected to 10.1.0.1"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": 5033755, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": 8268153, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": 725448, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": 1232382, "message": "Disconnected"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": 2764889, "message": "Connected to 10.1.0.2"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": 3450233, "message": "Connected to 10.1.0.3"}'
+ - do:
+ indices.create:
+ index: events_keyword_long
+ body:
+ mappings:
+ properties:
+ "@timestamp":
+ type: date
+ client_ip:
+ type: keyword
+ event_duration:
+ type: long
+ message:
+ type: keyword
+
+ - do:
+ bulk:
+ refresh: true
+ index: events_keyword_long
+ body:
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": 1756467, "message": "Connected to 10.1.0.1"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": 5033755, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": 8268153, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": 725448, "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": 1232382, "message": "Disconnected"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": 2764889, "message": "Connected to 10.1.0.2"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": 3450233, "message": "Connected to 10.1.0.3"}'
+
+ - do:
+ indices.create:
+ index: events_ip_keyword
+ body:
+ mappings:
+ properties:
+ "@timestamp":
+ type: date
+ client_ip:
+ type: ip
+ event_duration:
+ type: keyword
+ message:
+ type: keyword
+
+ - do:
+ bulk:
+ refresh: true
+ index: events_ip_keyword
+ body:
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": "1756467", "message": "Connected to 10.1.0.1"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": "5033755", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": "8268153", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": "725448", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": "1232382", "message": "Disconnected"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": "2764889", "message": "Connected to 10.1.0.2"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": "3450233", "message": "Connected to 10.1.0.3"}'
+
+ - do:
+ indices.create:
+ index: events_keyword_keyword
+ body:
+ mappings:
+ properties:
+ "@timestamp":
+ type: date
+ client_ip:
+ type: keyword
+ event_duration:
+ type: keyword
+ message:
+ type: keyword
+
+ - do:
+ bulk:
+ refresh: true
+ index: events_keyword_keyword
+ body:
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:55:01.543Z", "client_ip": "172.21.3.15", "event_duration": "1756467", "message": "Connected to 10.1.0.1"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:53:55.832Z", "client_ip": "172.21.3.15", "event_duration": "5033755", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:52:55.015Z", "client_ip": "172.21.3.15", "event_duration": "8268153", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:51:54.732Z", "client_ip": "172.21.3.15", "event_duration": "725448", "message": "Connection error"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T13:33:34.937Z", "client_ip": "172.21.0.5", "event_duration": "1232382", "message": "Disconnected"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:27:28.948Z", "client_ip": "172.21.2.113", "event_duration": "2764889", "message": "Connected to 10.1.0.2"}'
+ - '{"index": {}}'
+ - '{"@timestamp": "2023-10-23T12:15:03.360Z", "client_ip": "172.21.2.162", "event_duration": "3450233", "message": "Connected to 10.1.0.3"}'
+
+---
+load single index ip_long:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_long METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "_index" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "@timestamp" }
+ - match: { columns.1.type: "date" }
+ - match: { columns.2.name: "client_ip" }
+ - match: { columns.2.type: "ip" }
+ - match: { columns.3.name: "event_duration" }
+ - match: { columns.3.type: "long" }
+ - match: { columns.4.name: "message" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 7 }
+ - match: { values.0.0: "events_ip_long" }
+ - match: { values.0.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.2: "172.21.3.15" }
+ - match: { values.0.3: 1756467 }
+ - match: { values.0.4: "Connected to 10.1.0.1" }
+
+############################################################################################################
+# Test a single index as a control of the expected results
+
+---
+load single index keyword_keyword:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_keyword_keyword METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "_index" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "@timestamp" }
+ - match: { columns.1.type: "date" }
+ - match: { columns.2.name: "client_ip" }
+ - match: { columns.2.type: "keyword" }
+ - match: { columns.3.name: "event_duration" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "message" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 7 }
+ - match: { values.0.0: "events_keyword_keyword" }
+ - match: { values.0.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.2: "172.21.3.15" }
+ - match: { values.0.3: "1756467" }
+ - match: { values.0.4: "Connected to 10.1.0.1" }
+
+############################################################################################################
+# Test two indices where the event_duration is mapped as a LONG and as a KEYWORD
+
+---
+load two indices, showing unsupported type and null value for event_duration:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* METADATA _index | SORT _index ASC, @timestamp DESC'
+
+ - length: { values: 14 }
+
+ - match: { columns.0.name: "@timestamp" }
+ - match: { columns.0.type: "date" }
+ - match: { columns.1.name: "client_ip" }
+ - match: { columns.1.type: "ip" }
+ - match: { columns.2.name: "event_duration" }
+ - match: { columns.2.type: "unsupported" }
+ - match: { columns.3.name: "message" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "_index" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 14 }
+ - match: { values.0.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.1: "172.21.3.15" }
+ - match: { values.0.2: null }
+ - match: { values.0.3: "Connected to 10.1.0.1" }
+ - match: { values.0.4: "events_ip_keyword" }
+ - match: { values.7.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.7.1: "172.21.3.15" }
+ - match: { values.7.2: null }
+ - match: { values.7.3: "Connected to 10.1.0.1" }
+ - match: { values.7.4: "events_ip_long" }
+
+---
+load two indices with no conversion function, but needs TO_LONG conversion:
+ - do:
+ catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword\], \[long\] in \[events_ip_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_ip_* METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load two indices with incorrect conversion function, TO_IP instead of TO_LONG:
+ - do:
+ catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword\], \[long\] in \[events_ip_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_ip_* METADATA _index | EVAL event_duration = TO_IP(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load two indices with single conversion function TO_LONG:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* METADATA _index | EVAL event_duration = TO_LONG(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "_index" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "@timestamp" }
+ - match: { columns.1.type: "date" }
+ - match: { columns.2.name: "client_ip" }
+ - match: { columns.2.type: "ip" }
+ - match: { columns.3.name: "event_duration" }
+ - match: { columns.3.type: "long" }
+ - match: { columns.4.name: "message" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 14 }
+ - match: { values.0.0: "events_ip_keyword" }
+ - match: { values.0.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.2: "172.21.3.15" }
+ - match: { values.0.3: 1756467 }
+ - match: { values.0.4: "Connected to 10.1.0.1" }
+ - match: { values.7.0: "events_ip_long" }
+ - match: { values.7.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.7.2: "172.21.3.15" }
+ - match: { values.7.3: 1756467 }
+ - match: { values.7.4: "Connected to 10.1.0.1" }
+
+---
+load two indices and drop ambiguous field event_duration:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* | DROP event_duration'
+
+ - length: { values: 14 }
+
+---
+load two indices, convert and then drop ambiguous field event_duration:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* | EVAL event_duration = TO_LONG(event_duration) | DROP event_duration'
+
+ - length: { values: 14 }
+
+---
+load two indices, convert, rename and then drop ambiguous field event_duration:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* | EVAL x = TO_LONG(event_duration) | DROP event_duration'
+
+ - length: { values: 14 }
+
+---
+# This test needs to change to produce unsupported/null for the original field name
+load two indices, convert, rename but not drop ambiguous field event_duration:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_ip_* | EVAL x = TO_LONG(event_duration), y = TO_STRING(event_duration), z = TO_LONG(event_duration) | SORT @timestamp DESC'
+
+ - match: { columns.0.name: "@timestamp" }
+ - match: { columns.0.type: "date" }
+ - match: { columns.1.name: "client_ip" }
+ - match: { columns.1.type: "ip" }
+ - match: { columns.2.name: "event_duration" }
+ - match: { columns.2.type: "unsupported" }
+ - match: { columns.3.name: "message" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "x" }
+ - match: { columns.4.type: "long" }
+ - match: { columns.5.name: "y" }
+ - match: { columns.5.type: "keyword" }
+ - match: { columns.6.name: "z" }
+ - match: { columns.6.type: "long" }
+ - length: { values: 14 }
+ - match: { values.0.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.1: "172.21.3.15" }
+ - match: { values.0.2: null }
+ - match: { values.0.3: "Connected to 10.1.0.1" }
+ - match: { values.0.4: 1756467 }
+ - match: { values.0.5: "1756467" }
+ - match: { values.0.6: 1756467 }
+ - match: { values.1.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.1.1: "172.21.3.15" }
+ - match: { values.1.2: null }
+ - match: { values.1.3: "Connected to 10.1.0.1" }
+ - match: { values.1.4: 1756467 }
+ - match: { values.1.5: "1756467" }
+ - match: { values.1.6: 1756467 }
+
+############################################################################################################
+# Test two indices where the IP address is mapped as an IP and as a KEYWORD
+
+---
+load two indices, showing unsupported type and null value for client_ip:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long METADATA _index | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "@timestamp" }
+ - match: { columns.0.type: "date" }
+ - match: { columns.1.name: "client_ip" }
+ - match: { columns.1.type: "unsupported" }
+ - match: { columns.2.name: "event_duration" }
+ - match: { columns.2.type: "long" }
+ - match: { columns.3.name: "message" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "_index" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 14 }
+ - match: { values.0.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.1: null }
+ - match: { values.0.2: 1756467 }
+ - match: { values.0.3: "Connected to 10.1.0.1" }
+ - match: { values.0.4: "events_ip_long" }
+ - match: { values.7.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.7.1: null }
+ - match: { values.7.2: 1756467 }
+ - match: { values.7.3: "Connected to 10.1.0.1" }
+ - match: { values.7.4: "events_keyword_long" }
+
+---
+load two indices with no conversion function, but needs TO_IP conversion:
+ - do:
+ catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_long\], \[keyword\] in \[events_keyword_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_*_long METADATA _index | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load two indices with incorrect conversion function, TO_LONG instead of TO_IP:
+ - do:
+ catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_long\], \[keyword\] in \[events_keyword_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_*_long METADATA _index | EVAL client_ip = TO_LONG(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load two indices with single conversion function TO_IP:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long METADATA _index | EVAL client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "_index" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "@timestamp" }
+ - match: { columns.1.type: "date" }
+ - match: { columns.2.name: "client_ip" }
+ - match: { columns.2.type: "ip" }
+ - match: { columns.3.name: "event_duration" }
+ - match: { columns.3.type: "long" }
+ - match: { columns.4.name: "message" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 14 }
+ - match: { values.0.0: "events_ip_long" }
+ - match: { values.0.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.2: "172.21.3.15" }
+ - match: { values.0.3: 1756467 }
+ - match: { values.0.4: "Connected to 10.1.0.1" }
+ - match: { values.7.0: "events_keyword_long" }
+ - match: { values.7.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.7.2: "172.21.3.15" }
+ - match: { values.7.3: 1756467 }
+ - match: { values.7.4: "Connected to 10.1.0.1" }
+
+---
+load two indices and drop ambiguous field client_ip:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long | DROP client_ip'
+
+ - length: { values: 14 }
+
+---
+load two indices, convert and then drop ambiguous field client_ip:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long | EVAL client_ip = TO_IP(client_ip) | DROP client_ip'
+
+ - length: { values: 14 }
+
+---
+load two indices, convert, rename and then drop ambiguous field client_ip:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long | EVAL x = TO_IP(client_ip) | DROP client_ip'
+
+ - length: { values: 14 }
+
+---
+# This test needs to change to produce unsupported/null for the original field name
+load two indices, convert, rename but not drop ambiguous field client_ip:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_*_long | EVAL x = TO_IP(client_ip), y = TO_STRING(client_ip), z = TO_IP(client_ip) | SORT @timestamp DESC'
+
+ - match: { columns.0.name: "@timestamp" }
+ - match: { columns.0.type: "date" }
+ - match: { columns.1.name: "client_ip" }
+ - match: { columns.1.type: "unsupported" }
+ - match: { columns.2.name: "event_duration" }
+ - match: { columns.2.type: "long" }
+ - match: { columns.3.name: "message" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "x" }
+ - match: { columns.4.type: "ip" }
+ - match: { columns.5.name: "y" }
+ - match: { columns.5.type: "keyword" }
+ - match: { columns.6.name: "z" }
+ - match: { columns.6.type: "ip" }
+ - length: { values: 14 }
+ - match: { values.0.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.1: null }
+ - match: { values.0.2: 1756467 }
+ - match: { values.0.3: "Connected to 10.1.0.1" }
+ - match: { values.0.4: "172.21.3.15" }
+ - match: { values.0.5: "172.21.3.15" }
+ - match: { values.0.6: "172.21.3.15" }
+ - match: { values.1.0: "2023-10-23T13:55:01.543Z" }
+ - match: { values.1.1: null }
+ - match: { values.1.2: 1756467 }
+ - match: { values.1.3: "Connected to 10.1.0.1" }
+ - match: { values.1.4: "172.21.3.15" }
+ - match: { values.1.5: "172.21.3.15" }
+ - match: { values.1.6: "172.21.3.15" }
+
+############################################################################################################
+# Test four indices with both the client_IP (IP and KEYWORD) and event_duration (LONG and KEYWORD) mappings
+
+---
+load four indices with single conversion function TO_LONG:
+ - do:
+ catch: '/Cannot use field \[client_ip\] due to ambiguities being mapped as \[2\] incompatible types: \[ip\] in \[events_ip_keyword, events_ip_long\], \[keyword\] in \[events_keyword_keyword, events_keyword_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_* METADATA _index | EVAL event_duration = TO_LONG(event_duration) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load four indices with single conversion function TO_IP:
+ - do:
+ catch: '/Cannot use field \[event_duration\] due to ambiguities being mapped as \[2\] incompatible types: \[keyword\] in \[events_ip_keyword, events_keyword_keyword\], \[long\] in \[events_ip_long, events_keyword_long\]/'
+ esql.query:
+ body:
+ query: 'FROM events_* METADATA _index | EVAL client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+---
+load four indices with multiple conversion functions TO_LONG and TO_IP:
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM events_* METADATA _index | EVAL event_duration = TO_LONG(event_duration), client_ip = TO_IP(client_ip) | KEEP _index, @timestamp, client_ip, event_duration, message | SORT _index ASC, @timestamp DESC'
+
+ - match: { columns.0.name: "_index" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "@timestamp" }
+ - match: { columns.1.type: "date" }
+ - match: { columns.2.name: "client_ip" }
+ - match: { columns.2.type: "ip" }
+ - match: { columns.3.name: "event_duration" }
+ - match: { columns.3.type: "long" }
+ - match: { columns.4.name: "message" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 28 }
+ - match: { values.0.0: "events_ip_keyword" }
+ - match: { values.0.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.0.2: "172.21.3.15" }
+ - match: { values.0.3: 1756467 }
+ - match: { values.0.4: "Connected to 10.1.0.1" }
+ - match: { values.7.0: "events_ip_long" }
+ - match: { values.7.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.7.2: "172.21.3.15" }
+ - match: { values.7.3: 1756467 }
+ - match: { values.7.4: "Connected to 10.1.0.1" }
+ - match: { values.14.0: "events_keyword_keyword" }
+ - match: { values.14.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.14.2: "172.21.3.15" }
+ - match: { values.14.3: 1756467 }
+ - match: { values.14.4: "Connected to 10.1.0.1" }
+ - match: { values.21.0: "events_keyword_long" }
+ - match: { values.21.1: "2023-10-23T13:55:01.543Z" }
+ - match: { values.21.2: "172.21.3.15" }
+ - match: { values.21.3: 1756467 }
+ - match: { values.21.4: "Connected to 10.1.0.1" }
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml
new file mode 100644
index 0000000000000..99bd1d6508895
--- /dev/null
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/161_union_types_subfields.yml
@@ -0,0 +1,203 @@
+setup:
+ - requires:
+ capabilities:
+ - method: POST
+ path: /_query
+ parameters: [ method, path, parameters, capabilities ]
+ capabilities: [ union_types ]
+ reason: "Union types introduced in 8.15.0"
+ test_runner_features: [ capabilities, allowed_warnings_regex ]
+
+ - do:
+ indices.create:
+ index: test1
+ body:
+ mappings:
+ properties:
+ obj:
+ properties:
+ keyword:
+ type: keyword
+ integer:
+ type: integer
+ keyword:
+ type: boolean
+ integer:
+ type: version
+
+ - do:
+ indices.create:
+ index: test2
+ body:
+ mappings:
+ properties:
+ obj:
+ properties:
+ keyword:
+ type: boolean
+ integer:
+ type: version
+ keyword:
+ type: keyword
+ integer:
+ type: integer
+
+ - do:
+ bulk:
+ refresh: true
+ index: test1
+ body:
+ - '{ "index": {"_id": 11} }'
+ - '{ "obj.keyword": "true", "obj.integer": 100, "keyword": "true", "integer": "50" }'
+ - '{ "index": {"_id": 12} }'
+ - '{ "obj.keyword": "US", "obj.integer": 20, "keyword": false, "integer": "1.2.3" }'
+
+ - do:
+ bulk:
+ refresh: true
+ index: test2
+ body:
+ - '{ "index": {"_id": 21} }'
+ - '{ "obj.keyword": "true", "obj.integer": "50", "keyword": "true", "integer": 100 }'
+ - '{ "index": {"_id": 22} }'
+ - '{ "obj.keyword": false, "obj.integer": "1.2.3", "keyword": "US", "integer": 20 }'
+
+---
+"load single index":
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM test1 METADATA _id | KEEP _id, obj.integer, obj.keyword | SORT _id ASC'
+
+ - match: { columns.0.name: "_id" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "obj.integer" }
+ - match: { columns.1.type: "integer" }
+ - match: { columns.2.name: "obj.keyword" }
+ - match: { columns.2.type: "keyword" }
+ - length: { values: 2 }
+ - match: { values.0.0: "11" }
+ - match: { values.0.1: 100 }
+ - match: { values.0.2: "true" }
+ - match: { values.1.0: "12" }
+ - match: { values.1.1: 20 }
+ - match: { values.1.2: "US" }
+
+---
+"load two indices with to_string":
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM test* METADATA _id | EVAL s = TO_STRING(obj.keyword) | KEEP _id, s | SORT _id ASC'
+
+ - match: { columns.0.name: "_id" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "s" }
+ - match: { columns.1.type: "keyword" }
+ - length: { values: 4 }
+ - match: { values.0.0: "11" }
+ - match: { values.0.1: "true" }
+ - match: { values.1.0: "12" }
+ - match: { values.1.1: "US" }
+ - match: { values.2.0: "21" }
+ - match: { values.2.1: "true" }
+ - match: { values.3.0: "22" }
+ - match: { values.3.1: "false" }
+
+
+---
+"load two indices with to_version":
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM test* METADATA _id | EVAL v = TO_VERSION(TO_STRING(obj.integer)) | KEEP _id, v | SORT _id ASC'
+
+ - match: { columns.0.name: "_id" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "v" }
+ - match: { columns.1.type: "version" }
+ - length: { values: 4 }
+ - match: { values.0.0: "11" }
+ - match: { values.0.1: "100" }
+ - match: { values.1.0: "12" }
+ - match: { values.1.1: "20" }
+ - match: { values.2.0: "21" }
+ - match: { values.2.1: "50" }
+ - match: { values.3.0: "22" }
+ - match: { values.3.1: "1.2.3" }
+
+---
+"load two indices with to_version and to_string":
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM test* METADATA _id | EVAL v = TO_VERSION(TO_STRING(obj.integer)), s = TO_STRING(obj.keyword) | KEEP _id, v, s | SORT _id ASC'
+
+ - match: { columns.0.name: "_id" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "v" }
+ - match: { columns.1.type: "version" }
+ - match: { columns.2.name: "s" }
+ - match: { columns.2.type: "keyword" }
+ - length: { values: 4 }
+ - match: { values.0.0: "11" }
+ - match: { values.0.1: "100" }
+ - match: { values.0.2: "true" }
+ - match: { values.1.0: "12" }
+ - match: { values.1.1: "20" }
+ - match: { values.1.2: "US" }
+ - match: { values.2.0: "21" }
+ - match: { values.2.1: "50" }
+ - match: { values.2.2: "true" }
+ - match: { values.3.0: "22" }
+ - match: { values.3.1: "1.2.3" }
+ - match: { values.3.2: "false" }
+
+---
+"load two indices with to_version and to_string nested and un-nested":
+ - do:
+ allowed_warnings_regex:
+ - "No limit defined, adding default limit of \\[.*\\]"
+ esql.query:
+ body:
+ query: 'FROM test* METADATA _id | EVAL nv = TO_VERSION(TO_STRING(obj.integer)), uv = TO_VERSION(TO_STRING(integer)), ns = TO_STRING(obj.keyword), us = TO_STRING(keyword) | KEEP _id, nv, uv, ns, us | SORT _id ASC'
+
+ - match: { columns.0.name: "_id" }
+ - match: { columns.0.type: "keyword" }
+ - match: { columns.1.name: "nv" }
+ - match: { columns.1.type: "version" }
+ - match: { columns.2.name: "uv" }
+ - match: { columns.2.type: "version" }
+ - match: { columns.3.name: "ns" }
+ - match: { columns.3.type: "keyword" }
+ - match: { columns.4.name: "us" }
+ - match: { columns.4.type: "keyword" }
+ - length: { values: 4 }
+ - match: { values.0.0: "11" }
+ - match: { values.0.1: "100" }
+ - match: { values.0.2: "50" }
+ - match: { values.0.3: "true" }
+ - match: { values.0.4: "true" }
+ - match: { values.1.0: "12" }
+ - match: { values.1.1: "20" }
+ - match: { values.1.2: "1.2.3" }
+ - match: { values.1.3: "US" }
+ - match: { values.1.4: "false" }
+ - match: { values.2.0: "21" }
+ - match: { values.2.1: "50" }
+ - match: { values.2.2: "100" }
+ - match: { values.2.3: "true" }
+ - match: { values.2.4: "true" }
+ - match: { values.3.0: "22" }
+ - match: { values.3.1: "1.2.3" }
+ - match: { values.3.2: "20" }
+ - match: { values.3.3: "false" }
+ - match: { values.3.4: "US" }