From d893856cd4bf959e6243355d5600810cdf4231c0 Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 11:14:52 +0100 Subject: [PATCH 01/14] =?UTF-8?q?[ESQL]=C2=A0Adds=20wriring=20for=20comput?= =?UTF-8?q?e=20side=20of=20LIMIT=20BY=20command?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../definitions/referable/esql_limit_by.csv | 1 + .../resources/transport/upper_bounds/9.4.csv | 2 +- .../operator/GroupedLimitOperator.java | 289 ++++++++++++++++++ .../compute/operator/PositionKeyEncoder.java | 131 ++++++++ .../operator/GroupedLimitOperatorTests.java | 265 ++++++++++++++++ .../xpack/esql/plan/logical/Limit.java | 54 +++- .../xpack/esql/plan/physical/LimitExec.java | 34 ++- .../esql/planner/LocalExecutionPlanner.java | 25 +- .../esql/planner/mapper/LocalMapper.java | 2 +- .../xpack/esql/planner/mapper/Mapper.java | 2 +- .../xpack/esql/plugin/EsqlPlugin.java | 2 + 11 files changed, 789 insertions(+), 18 deletions(-) create mode 100644 server/src/main/resources/transport/definitions/referable/esql_limit_by.csv create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java diff --git a/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv b/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv new file mode 100644 index 0000000000000..c988fdc9fe987 --- /dev/null +++ b/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv @@ -0,0 +1 @@ +9304000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index c60446d500473..5a21cc3ebd006 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -query_dsl_boxplot_exponential_histogram_support,9303000 +esql_limit_by,9304000 diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java new file mode 100644 index 0000000000000..23244dd50da85 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -0,0 +1,289 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.BytesRefHashTable; +import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.compute.aggregation.blockhash.HashImplFactory; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * An operator that limits the number of rows emitted per group. + * For {@code LIMIT N BY x}, this accepts up to N rows for each distinct value of x. + * Group keys use list semantics for multivalues: {@code [1,2]} and {@code [2,1]} are different groups. + */ +public class GroupedLimitOperator implements Operator { + + private final int limit; + private final PositionKeyEncoder keyEncoder; + private final BigArrays bigArrays; + private final BytesRefHashTable seenKeys; + private IntArray counts; + + private int pagesProcessed; + private long rowsReceived; + private long rowsEmitted; + + private Page lastOutput; + private boolean finished; + + public GroupedLimitOperator(int limitPerGroup, PositionKeyEncoder keyEncoder, BlockFactory blockFactory) { + this.limit = limitPerGroup; + this.keyEncoder = keyEncoder; + this.bigArrays = blockFactory.bigArrays(); + this.seenKeys = HashImplFactory.newBytesRefHash(blockFactory); + this.counts = bigArrays.newIntArray(16, false); + } + + public static final class Factory implements Operator.OperatorFactory { + private final int limit; + private final int[] groupChannels; + private final List elementTypes; + + public Factory(int limit, List groupChannels, List elementTypes) { + this.limit = limit; + this.groupChannels = groupChannels.stream().mapToInt(Integer::intValue).toArray(); + this.elementTypes = elementTypes; + } + + @Override + public GroupedLimitOperator get(DriverContext driverContext) { + return new GroupedLimitOperator(limit, new PositionKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); + } + + @Override + public String describe() { + return "GroupedLimitOperator[limit = " + limit + "]"; + } + } + + @Override + public boolean needsInput() { + return finished == false && lastOutput == null; + } + + @Override + public void addInput(Page page) { + assert lastOutput == null : "has pending output page"; + int positionCount = page.getPositionCount(); + rowsReceived += positionCount; + + int acceptedCount = 0; + int[] accepted = new int[positionCount]; + + try { + for (int pos = 0; pos < positionCount; pos++) { + BytesRef key = keyEncoder.encode(page, pos); + long hashOrd = seenKeys.add(key); + int count = 0; + long ord; + if (hashOrd >= 0) { + ord = hashOrd; + counts = bigArrays.grow(counts, ord + 1); + } else { + ord = -(hashOrd + 1); + count = counts.get(ord); + } + if (count < limit) { + counts.set(ord, count + 1); + accepted[acceptedCount++] = pos; + } + } + } catch (Exception e) { + page.releaseBlocks(); + throw e; + } + + if (acceptedCount == 0) { + page.releaseBlocks(); + return; + } + + /* + * When all rows in a page are accepted the operator returns the + * original page instance rather than a filtered copy. + */ + if (acceptedCount == positionCount) { + lastOutput = page; + } else { + int[] positions = new int[acceptedCount]; + System.arraycopy(accepted, 0, positions, 0, acceptedCount); + lastOutput = filterPage(page, positions); + } + } + + private static Page filterPage(Page page, int[] positions) { + Block[] blocks = new Block[page.getBlockCount()]; + Page result = null; + try { + for (int b = 0; b < blocks.length; b++) { + blocks[b] = page.getBlock(b).filter(false, positions); + } + result = new Page(blocks); + } finally { + if (result == null) { + Releasables.closeExpectNoException(page::releaseBlocks, Releasables.wrap(blocks)); + } else { + page.releaseBlocks(); + } + } + return result; + } + + @Override + public void finish() { + finished = true; + } + + @Override + public boolean isFinished() { + return lastOutput == null && finished; + } + + @Override + public boolean canProduceMoreDataWithoutExtraInput() { + return lastOutput != null; + } + + @Override + public Page getOutput() { + if (lastOutput == null) { + return null; + } + Page result = lastOutput; + lastOutput = null; + pagesProcessed++; + rowsEmitted += result.getPositionCount(); + return result; + } + + @Override + public Status status() { + return new Status(limit, (int) seenKeys.size(), pagesProcessed, rowsReceived, rowsEmitted); + } + + @Override + public void close() { + Releasables.closeExpectNoException(lastOutput == null ? () -> {} : lastOutput::releaseBlocks, seenKeys, counts); + } + + @Override + public String toString() { + return "GroupedLimitOperator[limit = " + limit + ", groups = " + seenKeys.size() + "]"; + } + + public static class Status implements Operator.Status { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Operator.Status.class, + "grouped_limit", + Status::new + ); + + private final int limitPerGroup; + private final int groupCount; + private final int pagesProcessed; + private final long rowsReceived; + private final long rowsEmitted; + + protected Status(int limitPerGroup, int groupCount, int pagesProcessed, long rowsReceived, long rowsEmitted) { + this.limitPerGroup = limitPerGroup; + this.groupCount = groupCount; + this.pagesProcessed = pagesProcessed; + this.rowsReceived = rowsReceived; + this.rowsEmitted = rowsEmitted; + } + + protected Status(StreamInput in) throws IOException { + limitPerGroup = in.readVInt(); + groupCount = in.readVInt(); + pagesProcessed = in.readVInt(); + rowsReceived = in.readVLong(); + rowsEmitted = in.readVLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(limitPerGroup); + out.writeVInt(groupCount); + out.writeVInt(pagesProcessed); + out.writeVLong(rowsReceived); + out.writeVLong(rowsEmitted); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public int pagesProcessed() { + return pagesProcessed; + } + + public long rowsReceived() { + return rowsReceived; + } + + public long rowsEmitted() { + return rowsEmitted; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("limit_per_group", limitPerGroup); + builder.field("group_count", groupCount); + builder.field("pages_processed", pagesProcessed); + builder.field("rows_received", rowsReceived); + builder.field("rows_emitted", rowsEmitted); + return builder.endObject(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Status status = (Status) o; + return limitPerGroup == status.limitPerGroup + && groupCount == status.groupCount + && pagesProcessed == status.pagesProcessed + && rowsReceived == status.rowsReceived + && rowsEmitted == status.rowsEmitted; + } + + @Override + public int hashCode() { + return Objects.hash(limitPerGroup, groupCount, pagesProcessed, rowsReceived, rowsEmitted); + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersion.minimumCompatible(); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java new file mode 100644 index 0000000000000..3bb89441e7103 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; + +import java.util.List; + +/** + * Encodes the values at a given position across multiple blocks into a single {@link BytesRef} composite key. + * Multivalued positions are serialized with list semantics: the value count is written first, then each value + * in block iteration order. This means {@code [1, 2]} and {@code [2, 1]} produce different keys. + * Null positions are encoded as a value count of zero. + */ +public class PositionKeyEncoder { + + private final int[] groupChannels; + private final ElementType[] elementTypes; + private final BytesRefBuilder scratch = new BytesRefBuilder(); + private final BytesRef scratchBytesRef = new BytesRef(); + + public PositionKeyEncoder(int[] groupChannels, List elementTypes) { + this.groupChannels = groupChannels; + this.elementTypes = new ElementType[groupChannels.length]; + for (int i = 0; i < groupChannels.length; i++) { + this.elementTypes[i] = elementTypes.get(groupChannels[i]); + } + } + + /** + * Encode the group key for the given position from the page into a {@link BytesRef}. + * The returned reference is only valid until the next call to {@code encode}. + */ + public BytesRef encode(Page page, int position) { + scratch.clear(); + for (int i = 0; i < groupChannels.length; i++) { + Block block = page.getBlock(groupChannels[i]); + encodeBlock(block, elementTypes[i], position); + } + return scratch.get(); + } + + private void encodeBlock(Block block, ElementType type, int position) { + if (block.isNull(position)) { + writeVInt(0); + return; + } + int firstValueIndex = block.getFirstValueIndex(position); + int valueCount = block.getValueCount(position); + writeVInt(valueCount); + switch (type) { + case INT -> { + IntBlock b = (IntBlock) block; + for (int v = 0; v < valueCount; v++) { + writeInt(b.getInt(firstValueIndex + v)); + } + } + case LONG -> { + LongBlock b = (LongBlock) block; + for (int v = 0; v < valueCount; v++) { + writeLong(b.getLong(firstValueIndex + v)); + } + } + case DOUBLE -> { + DoubleBlock b = (DoubleBlock) block; + for (int v = 0; v < valueCount; v++) { + writeLong(Double.doubleToLongBits(b.getDouble(firstValueIndex + v))); + } + } + case FLOAT -> { + FloatBlock b = (FloatBlock) block; + for (int v = 0; v < valueCount; v++) { + writeInt(Float.floatToIntBits(b.getFloat(firstValueIndex + v))); + } + } + case BOOLEAN -> { + BooleanBlock b = (BooleanBlock) block; + for (int v = 0; v < valueCount; v++) { + scratch.append((byte) (b.getBoolean(firstValueIndex + v) ? 1 : 0)); + } + } + case BYTES_REF -> { + BytesRefBlock b = (BytesRefBlock) block; + for (int v = 0; v < valueCount; v++) { + BytesRef ref = b.getBytesRef(firstValueIndex + v, scratchBytesRef); + writeVInt(ref.length); + scratch.append(ref.bytes, ref.offset, ref.length); + } + } + case NULL -> { + // already handled by isNull above; nothing extra to write + } + default -> throw new IllegalArgumentException("unsupported element type for group key encoding: " + type); + } + } + + private void writeVInt(int value) { + while ((value & ~0x7F) != 0) { + scratch.append((byte) ((value & 0x7F) | 0x80)); + value >>>= 7; + } + scratch.append((byte) value); + } + + private void writeInt(int value) { + scratch.append((byte) (value >> 24)); + scratch.append((byte) (value >> 16)); + scratch.append((byte) (value >> 8)); + scratch.append((byte) value); + } + + private void writeLong(long value) { + writeInt((int) (value >> 32)); + writeInt((int) value); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java new file mode 100644 index 0000000000000..3ca37518f80f6 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -0,0 +1,265 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.test.OperatorTestCase; +import org.elasticsearch.compute.test.operator.blocksource.SequenceLongBlockSourceOperator; +import org.hamcrest.Matcher; + +import java.util.List; +import java.util.Map; +import java.util.stream.LongStream; + +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; + +public class GroupedLimitOperatorTests extends OperatorTestCase { + + @Override + protected GroupedLimitOperator.Factory simple(SimpleOptions options) { + return new GroupedLimitOperator.Factory(100, List.of(0), List.of(ElementType.LONG)); + } + + @Override + protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { + return new SequenceLongBlockSourceOperator(blockFactory, LongStream.range(0, size)); + } + + @Override + protected Matcher expectedDescriptionOfSimple() { + return equalTo("GroupedLimitOperator[limit = 100]"); + } + + @Override + protected Matcher expectedToStringOfSimple() { + return equalTo("GroupedLimitOperator[limit = 100, groups = 0]"); + } + + @Override + protected void assertSimpleOutput(List input, List results) { + int inputPositionCount = input.stream().mapToInt(Page::getPositionCount).sum(); + int outputPositionCount = results.stream().mapToInt(Page::getPositionCount).sum(); + assertThat(outputPositionCount, equalTo(inputPositionCount)); + } + + public void testStatus() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (GroupedLimitOperator op = simple(SimpleOptions.DEFAULT).get(ctx)) { + GroupedLimitOperator.Status status = op.status(); + assertThat(status.pagesProcessed(), equalTo(0)); + assertThat(status.rowsReceived(), equalTo(0L)); + assertThat(status.rowsEmitted(), equalTo(0L)); + + Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 1 }, 3).asBlock()); + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(3)); + } finally { + output.releaseBlocks(); + } + + status = op.status(); + assertThat(status.pagesProcessed(), equalTo(1)); + assertThat(status.rowsReceived(), equalTo(3L)); + assertThat(status.rowsEmitted(), equalTo(3L)); + } + } + + public void testNeedInput() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (GroupedLimitOperator op = simple(SimpleOptions.DEFAULT).get(ctx)) { + assertTrue(op.needsInput()); + Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + op.addInput(p); + assertFalse(op.needsInput()); + op.getOutput().releaseBlocks(); + assertTrue(op.needsInput()); + op.finish(); + assertFalse(op.needsInput()); + } + } + + /** + * With limit=2, group 1 appears 3 times and group 2 appears 2 times. + * Only the first 2 rows of group 1 should be retained. + */ + public void testLimitPerGroupSinglePage() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 2, + new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 1, 1, 2 }, 5).asBlock()); + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(4)); + LongBlock resultBlock = output.getBlock(0); + assertThat(resultBlock.getLong(0), equalTo(1L)); + assertThat(resultBlock.getLong(1), equalTo(2L)); + assertThat(resultBlock.getLong(2), equalTo(1L)); + assertThat(resultBlock.getLong(3), equalTo(2L)); + } finally { + output.releaseBlocks(); + } + } + } + + /** + * Group limits are enforced across page boundaries: a group saturated + * in the first page rejects further rows in subsequent pages. + */ + public void testLimitPerGroupAcrossPages() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 2, + new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 2 }, 3).asBlock()); + op.addInput(p1); + Page out1 = op.getOutput(); + try { + assertThat(out1.getPositionCount(), equalTo(3)); + } finally { + out1.releaseBlocks(); + } + + Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 2 }, 3).asBlock()); + op.addInput(p2); + Page out2 = op.getOutput(); + try { + assertThat(out2.getPositionCount(), equalTo(1)); + LongBlock b = out2.getBlock(0); + assertThat(b.getLong(0), equalTo(2L)); + } finally { + out2.releaseBlocks(); + } + } + } + + /** + * When every row in a page belongs to an already-saturated group, + * the entire page is dropped and getOutput returns null. + */ + public void testEntirePageFiltered() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1 }, 1).asBlock()); + op.addInput(p1); + Page out1 = op.getOutput(); + try { + assertThat(out1.getPositionCount(), equalTo(1)); + } finally { + out1.releaseBlocks(); + } + + Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 1 }, 3).asBlock()); + op.addInput(p2); + assertThat(op.getOutput(), nullValue()); + } + } + + public void testPagePassesThroughWhenAllAccepted() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 10, + new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output, sameInstance(p)); + } finally { + output.releaseBlocks(); + } + } + } + + /** + * Groups are determined by composite keys across multiple channels. + * (1,10), (1,20), and (2,10) are three distinct groups. + */ + public void testMultipleGroupChannels() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + new PositionKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), + blockFactory + ) + ) { + Page p = new Page( + blockFactory.newLongArrayVector(new long[] { 1, 1, 1, 2 }, 4).asBlock(), + blockFactory.newLongArrayVector(new long[] { 10, 20, 10, 10 }, 4).asBlock() + ); + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(3)); + LongBlock col0 = output.getBlock(0); + LongBlock col1 = output.getBlock(1); + assertThat(col0.getLong(0), equalTo(1L)); + assertThat(col1.getLong(0), equalTo(10L)); + assertThat(col0.getLong(1), equalTo(1L)); + assertThat(col1.getLong(1), equalTo(20L)); + assertThat(col0.getLong(2), equalTo(2L)); + assertThat(col1.getLong(2), equalTo(10L)); + } finally { + output.releaseBlocks(); + } + } + } + + @Override + protected void assertStatus(Map map, List input, List output) { + var emittedRows = output.stream().mapToInt(Page::getPositionCount).sum(); + var inputRows = input.stream().mapToInt(Page::getPositionCount).sum(); + + var mapMatcher = matchesMap().entry("limit_per_group", 100) + .entry("group_count", greaterThanOrEqualTo(0)) + .entry("pages_processed", output.size()) + .entry("rows_received", allOf(greaterThanOrEqualTo(emittedRows), lessThanOrEqualTo(inputRows))) + .entry("rows_emitted", emittedRows); + + assertMap(map, mapMatcher); + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java index 6d57f3010df73..6695cf72385d5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java @@ -6,22 +6,29 @@ */ package org.elasticsearch.xpack.esql.plan.logical; +import org.elasticsearch.TransportVersion; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.capabilities.TelemetryAware; +import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; +import java.util.List; import java.util.Objects; public class Limit extends UnaryPlan implements TelemetryAware, PipelineBreaker, ExecutesOn { + public static final TransportVersion ESQL_LIMIT_BY = TransportVersion.fromName("esql_limit_by"); + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Limit", Limit::new); private final Expression limit; + + private List groupings; /** * Important for optimizations. This should be {@code false} in most cases, which allows this instance to be duplicated past a child * plan node that increases the number of rows, like for LOOKUP JOIN and MV_EXPAND. @@ -43,11 +50,24 @@ public Limit(Source source, Expression limit, LogicalPlan child) { this(source, limit, child, false, false); } - public Limit(Source source, Expression limit, LogicalPlan child, boolean duplicated, boolean local) { + /** + * Create a new instance with groupings, which are the expressions used in LIMIT BY. This sets {@link Limit#duplicated} + * and {@link Limit#local} to {@code false}. + */ + public Limit(Source source, Expression limit, LogicalPlan child, List groupings) { + this(source, limit, child, groupings, false, false); + } + + public Limit(Source source, Expression limit, LogicalPlan child, List groupings, boolean duplicated, boolean local) { super(source, child); this.limit = limit; this.duplicated = duplicated; this.local = local; + this.groupings = groupings; + } + + public Limit(Source source, Expression limit, LogicalPlan child, boolean duplicated, boolean local) { + this(source, limit, child, List.of(), duplicated, local); } /** @@ -58,9 +78,14 @@ private Limit(StreamInput in) throws IOException { Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(LogicalPlan.class), + List.of(), false, false ); + + if (in.getTransportVersion().supports(ESQL_LIMIT_BY)) { + this.groupings = in.readNamedWriteableCollectionAsList(Expression.class); + } } /** @@ -73,6 +98,12 @@ public void writeTo(StreamOutput out) throws IOException { Source.EMPTY.writeTo(out); out.writeNamedWriteable(limit()); out.writeNamedWriteable(child()); + + if (out.getTransportVersion().supports(ESQL_LIMIT_BY)) { + out.writeNamedWriteableCollection(groupings()); + } else if (groupings.isEmpty() == false) { + throw new IllegalArgumentException("LIMIT BY is not supported by all nodes in the cluster"); + } } @Override @@ -82,20 +113,24 @@ public String getWriteableName() { @Override protected NodeInfo info() { - return NodeInfo.create(this, Limit::new, limit, child(), duplicated, local); + return NodeInfo.create(this, Limit::new, limit, child(), groupings, duplicated, local); } @Override public Limit replaceChild(LogicalPlan newChild) { - return new Limit(source(), limit, newChild, duplicated, local); + return new Limit(source(), limit, newChild, groupings, duplicated, local); } public Expression limit() { return limit; } + public List groupings() { + return groupings; + } + public Limit withLimit(Expression limit) { - return new Limit(source(), limit, child(), duplicated, local); + return new Limit(source(), limit, child(), groupings, duplicated, local); } public boolean duplicated() { @@ -107,21 +142,21 @@ public boolean local() { } public Limit withDuplicated(boolean duplicated) { - return new Limit(source(), limit, child(), duplicated, local); + return new Limit(source(), limit, child(), groupings, duplicated, local); } public Limit withLocal(boolean newLocal) { - return new Limit(source(), limit, child(), duplicated, newLocal); + return new Limit(source(), limit, child(), groupings, duplicated, newLocal); } @Override public boolean expressionsResolved() { - return limit.resolved(); + return limit.resolved() && Resolvables.resolved(groupings); } @Override public int hashCode() { - return Objects.hash(limit, child(), duplicated, local); + return Objects.hash(limit, child(), duplicated, local, groupings); } @Override @@ -138,7 +173,8 @@ public boolean equals(Object obj) { return Objects.equals(limit, other.limit) && Objects.equals(child(), other.child()) && (duplicated == other.duplicated) - && (local == other.local); + && (local == other.local) + && Objects.equals(groupings, other.groupings); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java index 0619a3b2fd77d..4e8eab99de488 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java @@ -22,6 +22,8 @@ import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.esql.plan.logical.Limit.ESQL_LIMIT_BY; + public class LimitExec extends UnaryExec implements EstimatesRowSize { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( PhysicalPlan.class, @@ -33,20 +35,31 @@ public class LimitExec extends UnaryExec implements EstimatesRowSize { private final Expression limit; private final Integer estimatedRowSize; + private List groupings; - public LimitExec(Source source, PhysicalPlan child, Expression limit, Integer estimatedRowSize) { + public LimitExec(Source source, PhysicalPlan child, Expression limit, List groupings, Integer estimatedRowSize) { super(source, child); this.limit = limit; + this.groupings = groupings; this.estimatedRowSize = estimatedRowSize; } + public LimitExec(Source source, PhysicalPlan child, Expression limit, Integer estimatedRowSize) { + this(source, child, limit, List.of(), estimatedRowSize); + } + private LimitExec(StreamInput in) throws IOException { this( Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(PhysicalPlan.class), in.readNamedWriteable(Expression.class), + List.of(), in.getTransportVersion().supports(ESQL_LIMIT_ROW_SIZE) ? in.readOptionalVInt() : null ); + + if (in.getTransportVersion().supports(ESQL_LIMIT_BY)) { + this.groupings = in.readNamedWriteableCollectionAsList(Expression.class); + } } @Override @@ -57,6 +70,12 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().supports(ESQL_LIMIT_ROW_SIZE)) { out.writeOptionalVInt(estimatedRowSize); } + + if (out.getTransportVersion().supports(ESQL_LIMIT_BY)) { + out.writeNamedWriteableCollection(groupings()); + } else if (groupings.isEmpty() == false) { + throw new IllegalArgumentException("LIMIT BY is not supported by all nodes in the cluster"); + } } @Override @@ -66,18 +85,22 @@ public String getWriteableName() { @Override protected NodeInfo info() { - return NodeInfo.create(this, LimitExec::new, child(), limit, estimatedRowSize); + return NodeInfo.create(this, LimitExec::new, child(), limit, groupings, estimatedRowSize); } @Override public LimitExec replaceChild(PhysicalPlan newChild) { - return new LimitExec(source(), newChild, limit, estimatedRowSize); + return new LimitExec(source(), newChild, limit, groupings, estimatedRowSize); } public Expression limit() { return limit; } + public List groupings() { + return groupings; + } + public Integer estimatedRowSize() { return estimatedRowSize; } @@ -90,12 +113,12 @@ public PhysicalPlan estimateRowSize(State unused) { state.add(needsSortedDocIds, output); int size = state.consumeAllFields(true); size = Math.max(size, 1); - return Objects.equals(this.estimatedRowSize, size) ? this : new LimitExec(source(), child(), limit, size); + return Objects.equals(this.estimatedRowSize, size) ? this : new LimitExec(source(), child(), limit, groupings, size); } @Override public int hashCode() { - return Objects.hash(limit, estimatedRowSize, child()); + return Objects.hash(limit, groupings, estimatedRowSize, child()); } @Override @@ -110,6 +133,7 @@ public boolean equals(Object obj) { LimitExec other = (LimitExec) obj; return Objects.equals(limit, other.limit) + && Objects.equals(groupings, other.groupings) && Objects.equals(estimatedRowSize, other.estimatedRowSize) && Objects.equals(child(), other.child()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index c8f73258a5ab8..9bdfc76644860 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -38,6 +38,7 @@ import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.EvalOperator.EvalOperatorFactory; import org.elasticsearch.compute.operator.FilterOperator.FilterOperatorFactory; +import org.elasticsearch.compute.operator.GroupedLimitOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.LocalSourceOperator; import org.elasticsearch.compute.operator.LocalSourceOperator.LocalSourceFactory; @@ -604,6 +605,14 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte ); } + private static int getAttributeChannel(Expression expression, Layout layout, String errMessage) { + if (expression instanceof Attribute a) { + return layout.get(a.id()).channel(); + } else { + throw new EsqlIllegalArgumentException(errMessage); + } + } + private static MappedFieldType.FieldExtractPreference fieldExtractPreference(TopNExec topNExec, Set nameIds) { MappedFieldType.FieldExtractPreference fieldExtractPreference = MappedFieldType.FieldExtractPreference.NONE; // See if any of the NameIds is marked as having been loaded with doc-values preferences, which will affect the ElementType chosen. @@ -1283,7 +1292,21 @@ private PhysicalOperation planFilter(FilterExec filter, LocalExecutionPlannerCon private PhysicalOperation planLimit(LimitExec limit, LocalExecutionPlannerContext context) { PhysicalOperation source = plan(limit.child(), context); - return source.with(new LimitOperator.Factory((Integer) limit.limit().fold(context.foldCtx)), source.layout); + int limitValue = (Integer) limit.limit().fold(context.foldCtx); + if (limit.groupings().isEmpty()) { + return source.with(new LimitOperator.Factory(limitValue), source.layout); + } + Layout layout = source.layout; + List groupKeys = limit.groupings() + .stream() + .map(g -> getAttributeChannel(g, layout, "expression in LIMIT BY must be an attribute")) + .toList(); + List inverse = layout.inverse(); + List elementTypes = new ArrayList<>(layout.numberOfChannels()); + for (int channel = 0; channel < inverse.size(); channel++) { + elementTypes.add(PlannerUtils.toElementType(inverse.get(channel).type())); + } + return source.with(new GroupedLimitOperator.Factory(limitValue, groupKeys, elementTypes), source.layout); } private PhysicalOperation planMvExpand(MvExpandExec mvExpandExec, LocalExecutionPlannerContext context) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java index d3065af7172aa..6e761712819a8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java @@ -86,7 +86,7 @@ private PhysicalPlan mapUnary(UnaryPlan unary) { } if (unary instanceof Limit limit) { - return new LimitExec(limit.source(), mappedChild, limit.limit(), null); + return new LimitExec(limit.source(), mappedChild, limit.limit(), limit.groupings(), null); } if (unary instanceof TopN topN) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java index 251ab9ac317e9..135e3dae9e69a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java @@ -137,7 +137,7 @@ else if (aggregate.groupings() if (unary instanceof Limit limit) { mappedChild = addExchangeForFragment(limit, mappedChild); - return new LimitExec(limit.source(), mappedChild, limit.limit(), null); + return new LimitExec(limit.source(), mappedChild, limit.limit(), limit.groupings(), null); } if (unary instanceof TopN topN) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 1725048bbd450..b36a92075c0e2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -29,6 +29,7 @@ import org.elasticsearch.compute.operator.AggregationOperator; import org.elasticsearch.compute.operator.AsyncOperator; import org.elasticsearch.compute.operator.DriverStatus; +import org.elasticsearch.compute.operator.GroupedLimitOperator; import org.elasticsearch.compute.operator.HashAggregationOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.MMROperator; @@ -406,6 +407,7 @@ public List getNamedWriteables() { entries.add(org.elasticsearch.xpack.esql.datasources.AsyncExternalSourceOperator.Status.ENTRY); entries.add(HashAggregationOperator.Status.ENTRY); entries.add(LimitOperator.Status.ENTRY); + entries.add(GroupedLimitOperator.Status.ENTRY); entries.add(LuceneOperator.Status.ENTRY); entries.add(TopNOperatorStatus.ENTRY); entries.add(MvExpandOperator.Status.ENTRY); From 03d0830917f5cd1b8ec749bcced83a313f97c7a6 Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 13:08:19 +0100 Subject: [PATCH 02/14] Adds serialization tests for LIMIT with groupings --- .../plan/logical/LimitSerializationTests.java | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java index d762416db64d8..21710983c2143 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; import java.io.IOException; +import java.util.List; public class LimitSerializationTests extends AbstractLogicalPlanSerializationTests { @Override @@ -20,23 +21,30 @@ protected Limit createTestInstance() { Source source = randomSource(); Expression limit = FieldAttributeTests.createFieldAttribute(0, false); LogicalPlan child = randomChild(0); - return new Limit(source, limit, child, randomBoolean(), randomBoolean()); + List groupings = randomGroupings(); + return new Limit(source, limit, child, groupings, randomBoolean(), randomBoolean()); } @Override protected Limit mutateInstance(Limit instance) throws IOException { Expression limit = instance.limit(); LogicalPlan child = instance.child(); + List groupings = instance.groupings(); boolean duplicated = instance.duplicated(); boolean local = instance.local(); - switch (randomIntBetween(0, 3)) { + switch (randomIntBetween(0, 4)) { case 0 -> limit = randomValueOtherThan(limit, () -> FieldAttributeTests.createFieldAttribute(0, false)); case 1 -> child = randomValueOtherThan(child, () -> randomChild(0)); - case 2 -> duplicated = duplicated == false; - case 3 -> local = local == false; + case 2 -> groupings = randomValueOtherThan(groupings, LimitSerializationTests::randomGroupings); + case 3 -> duplicated = duplicated == false; + case 4 -> local = local == false; default -> throw new IllegalStateException("Should never reach here"); } - return new Limit(instance.source(), limit, child, duplicated, local); + return new Limit(instance.source(), limit, child, groupings, duplicated, local); + } + + private static List randomGroupings() { + return randomList(0, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); } @Override From 57a679849289dda87c756ad0d859f682b054455c Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 13:45:14 +0100 Subject: [PATCH 03/14] Adds more tests and tweaks --- .../operator/GroupedLimitOperator.java | 28 +- .../GroupedLimitOperatorStatusTests.java | 58 ++++ .../operator/PositionKeyEncoderTests.java | 250 ++++++++++++++++++ 3 files changed, 326 insertions(+), 10 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorStatusTests.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 23244dd50da85..2ce8a81c1efdb 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -35,7 +35,7 @@ */ public class GroupedLimitOperator implements Operator { - private final int limit; + private final int limitPerGroup; private final PositionKeyEncoder keyEncoder; private final BigArrays bigArrays; private final BytesRefHashTable seenKeys; @@ -49,7 +49,7 @@ public class GroupedLimitOperator implements Operator { private boolean finished; public GroupedLimitOperator(int limitPerGroup, PositionKeyEncoder keyEncoder, BlockFactory blockFactory) { - this.limit = limitPerGroup; + this.limitPerGroup = limitPerGroup; this.keyEncoder = keyEncoder; this.bigArrays = blockFactory.bigArrays(); this.seenKeys = HashImplFactory.newBytesRefHash(blockFactory); @@ -57,24 +57,24 @@ public GroupedLimitOperator(int limitPerGroup, PositionKeyEncoder keyEncoder, Bl } public static final class Factory implements Operator.OperatorFactory { - private final int limit; + private final int limitPerGroup; private final int[] groupChannels; private final List elementTypes; - public Factory(int limit, List groupChannels, List elementTypes) { - this.limit = limit; + public Factory(int limitPerGroup, List groupChannels, List elementTypes) { + this.limitPerGroup = limitPerGroup; this.groupChannels = groupChannels.stream().mapToInt(Integer::intValue).toArray(); this.elementTypes = elementTypes; } @Override public GroupedLimitOperator get(DriverContext driverContext) { - return new GroupedLimitOperator(limit, new PositionKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); + return new GroupedLimitOperator(limitPerGroup, new PositionKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); } @Override public String describe() { - return "GroupedLimitOperator[limit = " + limit + "]"; + return "GroupedLimitOperator[limit = " + limitPerGroup + "]"; } } @@ -105,7 +105,7 @@ public void addInput(Page page) { ord = -(hashOrd + 1); count = counts.get(ord); } - if (count < limit) { + if (count < limitPerGroup) { counts.set(ord, count + 1); accepted[acceptedCount++] = pos; } @@ -180,7 +180,7 @@ public Page getOutput() { @Override public Status status() { - return new Status(limit, (int) seenKeys.size(), pagesProcessed, rowsReceived, rowsEmitted); + return new Status(limitPerGroup, (int) seenKeys.size(), pagesProcessed, rowsReceived, rowsEmitted); } @Override @@ -190,7 +190,7 @@ public void close() { @Override public String toString() { - return "GroupedLimitOperator[limit = " + limit + ", groups = " + seenKeys.size() + "]"; + return "GroupedLimitOperator[limit = " + limitPerGroup + ", groups = " + seenKeys.size() + "]"; } public static class Status implements Operator.Status { @@ -236,6 +236,14 @@ public String getWriteableName() { return ENTRY.name; } + public int limitPerGroup() { + return limitPerGroup; + } + + public int groupCount() { + return groupCount; + } + public int pagesProcessed() { return pagesProcessed; } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorStatusTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorStatusTests.java new file mode 100644 index 0000000000000..57d32f7dedeeb --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorStatusTests.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.equalTo; + +public class GroupedLimitOperatorStatusTests extends AbstractWireSerializingTestCase { + public void testToXContent() { + assertThat(Strings.toString(new GroupedLimitOperator.Status(10, 5, 3, 111, 222)), equalTo(""" + {"limit_per_group":10,"group_count":5,"pages_processed":3,"rows_received":111,"rows_emitted":222}""")); + } + + @Override + protected Writeable.Reader instanceReader() { + return GroupedLimitOperator.Status::new; + } + + @Override + protected GroupedLimitOperator.Status createTestInstance() { + return new GroupedLimitOperator.Status( + randomNonNegativeInt(), + randomNonNegativeInt(), + randomNonNegativeInt(), + randomNonNegativeLong(), + randomNonNegativeLong() + ); + } + + @Override + protected GroupedLimitOperator.Status mutateInstance(GroupedLimitOperator.Status instance) throws IOException { + int limitPerGroup = instance.limitPerGroup(); + int groupCount = instance.groupCount(); + int pagesProcessed = instance.pagesProcessed(); + long rowsReceived = instance.rowsReceived(); + long rowsEmitted = instance.rowsEmitted(); + switch (between(0, 4)) { + case 0 -> limitPerGroup = randomValueOtherThan(limitPerGroup, ESTestCase::randomNonNegativeInt); + case 1 -> groupCount = randomValueOtherThan(groupCount, ESTestCase::randomNonNegativeInt); + case 2 -> pagesProcessed = randomValueOtherThan(pagesProcessed, ESTestCase::randomNonNegativeInt); + case 3 -> rowsReceived = randomValueOtherThan(rowsReceived, ESTestCase::randomNonNegativeLong); + case 4 -> rowsEmitted = randomValueOtherThan(rowsEmitted, ESTestCase::randomNonNegativeLong); + default -> throw new IllegalArgumentException(); + } + return new GroupedLimitOperator.Status(limitPerGroup, groupCount, pagesProcessed, rowsReceived, rowsEmitted); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java new file mode 100644 index 0000000000000..948969b7aa25d --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java @@ -0,0 +1,250 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.test.ComputeTestCase; + +import java.util.List; + +public class PositionKeyEncoderTests extends ComputeTestCase { + + public void testSameIntValuesSameKey() { + BlockFactory bf = blockFactory(); + Page page = new Page(bf.newIntArrayVector(new int[] { 7, 7 }, 2).asBlock()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + + public void testDifferentIntValuesDifferentKeys() { + BlockFactory bf = blockFactory(); + Page page = new Page(bf.newIntArrayVector(new int[] { 1, 2 }, 2).asBlock()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + + public void testLongType() { + BlockFactory bf = blockFactory(); + Page page = new Page(bf.newLongArrayVector(new long[] { 100, 200, 100 }, 3).asBlock()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + BytesRef key0 = copy(encoder.encode(page, 0)); + BytesRef key1 = copy(encoder.encode(page, 1)); + BytesRef key2 = copy(encoder.encode(page, 2)); + assertEquals(key0, key2); + assertNotEquals(key0, key1); + } finally { + page.releaseBlocks(); + } + } + + public void testDoubleType() { + BlockFactory bf = blockFactory(); + Page page = new Page(bf.newDoubleArrayVector(new double[] { 1.5, 2.5, 1.5 }, 3).asBlock()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.DOUBLE)); + BytesRef key0 = copy(encoder.encode(page, 0)); + BytesRef key1 = copy(encoder.encode(page, 1)); + BytesRef key2 = copy(encoder.encode(page, 2)); + assertEquals(key0, key2); + assertNotEquals(key0, key1); + } finally { + page.releaseBlocks(); + } + } + + public void testFloatType() { + BlockFactory bf = blockFactory(); + Page page = new Page(bf.newFloatArrayVector(new float[] { 1.5f, 2.5f, 1.5f }, 3).asBlock()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.FLOAT)); + BytesRef key0 = copy(encoder.encode(page, 0)); + BytesRef key1 = copy(encoder.encode(page, 1)); + BytesRef key2 = copy(encoder.encode(page, 2)); + assertEquals(key0, key2); + assertNotEquals(key0, key1); + } finally { + page.releaseBlocks(); + } + } + + public void testBooleanType() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newBooleanBlockBuilder(3)) { + builder.appendBoolean(true); + builder.appendBoolean(false); + builder.appendBoolean(true); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.BOOLEAN)); + BytesRef keyTrue = copy(encoder.encode(page, 0)); + BytesRef keyFalse = copy(encoder.encode(page, 1)); + BytesRef keyTrue2 = copy(encoder.encode(page, 2)); + assertEquals(keyTrue, keyTrue2); + assertNotEquals(keyTrue, keyFalse); + } finally { + page.releaseBlocks(); + } + } + } + + public void testBytesRefType() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newBytesRefBlockBuilder(3)) { + builder.appendBytesRef(new BytesRef("hello")); + builder.appendBytesRef(new BytesRef("world")); + builder.appendBytesRef(new BytesRef("hello")); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.BYTES_REF)); + BytesRef key0 = copy(encoder.encode(page, 0)); + BytesRef key1 = copy(encoder.encode(page, 1)); + BytesRef key2 = copy(encoder.encode(page, 2)); + assertEquals(key0, key2); + assertNotEquals(key0, key1); + } finally { + page.releaseBlocks(); + } + } + } + + /** + * A null position must produce a key distinct from any non-null value, + * including zero / empty. + */ + public void testNullProducesDistinctKey() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newLongBlockBuilder(2)) { + builder.appendNull(); + builder.appendLong(0); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + } + + /** + * {@code [1, 2]} and {@code [2, 1]} are different groups because + * multivalues use list (order-sensitive) semantics. + */ + public void testMultivalueOrderMatters() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(2); + builder.appendLong(1); + builder.endPositionEntry(); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + } + + /** + * Two positions with the same multivalue list in the same order + * produce identical keys. + */ + public void testMultivalueSameOrderSameKey() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + } + + /** + * Composite keys: (1, "a"), (1, "b"), and (2, "a") are all distinct groups. + */ + public void testCompositeKey() { + BlockFactory bf = blockFactory(); + try (var longBuilder = bf.newLongBlockBuilder(3); var bytesBuilder = bf.newBytesRefBlockBuilder(3)) { + longBuilder.appendLong(1); + longBuilder.appendLong(1); + longBuilder.appendLong(2); + bytesBuilder.appendBytesRef(new BytesRef("a")); + bytesBuilder.appendBytesRef(new BytesRef("b")); + bytesBuilder.appendBytesRef(new BytesRef("a")); + Page page = new Page(longBuilder.build(), bytesBuilder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder( + new int[] { 0, 1 }, + List.of(ElementType.LONG, ElementType.BYTES_REF) + ); + BytesRef key0 = copy(encoder.encode(page, 0)); + BytesRef key1 = copy(encoder.encode(page, 1)); + BytesRef key2 = copy(encoder.encode(page, 2)); + assertNotEquals(key0, key1); + assertNotEquals(key0, key2); + assertNotEquals(key1, key2); + } finally { + page.releaseBlocks(); + } + } + } + + /** + * A single value and a multivalue with one element that matches + * should produce the same key (both have value count 1). + */ + public void testSingleValueMatchesOneElementMultivalue() { + BlockFactory bf = blockFactory(); + try (var builder = bf.newLongBlockBuilder(2)) { + builder.appendLong(42); + builder.beginPositionEntry(); + builder.appendLong(42); + builder.endPositionEntry(); + Page page = new Page(builder.build()); + try { + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); + } finally { + page.releaseBlocks(); + } + } + } + + private static BytesRef copy(BytesRef ref) { + return BytesRef.deepCopyOf(ref); + } +} From d7ed4e1fbce30ee61625bee979e9a4d8ce1d8e59 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 3 Mar 2026 12:52:58 +0000 Subject: [PATCH 04/14] [CI] Auto commit changes from spotless --- .../compute/operator/GroupedLimitOperator.java | 6 +++++- .../compute/operator/PositionKeyEncoderTests.java | 5 +---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 2ce8a81c1efdb..aa4b9f6083018 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -69,7 +69,11 @@ public Factory(int limitPerGroup, List groupChannels, List @Override public GroupedLimitOperator get(DriverContext driverContext) { - return new GroupedLimitOperator(limitPerGroup, new PositionKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); + return new GroupedLimitOperator( + limitPerGroup, + new PositionKeyEncoder(groupChannels, elementTypes), + driverContext.blockFactory() + ); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java index 948969b7aa25d..f504e931ca36c 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java @@ -207,10 +207,7 @@ public void testCompositeKey() { bytesBuilder.appendBytesRef(new BytesRef("a")); Page page = new Page(longBuilder.build(), bytesBuilder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder( - new int[] { 0, 1 }, - List.of(ElementType.LONG, ElementType.BYTES_REF) - ); + PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.BYTES_REF)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); From b232ec066b5f41d0ecc67c487877ef4057da6618 Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 14:00:24 +0100 Subject: [PATCH 05/14] Adds transport version tests --- .../plan/logical/LimitSerializationTests.java | 22 ++++++++++ .../physical/LimitExecSerializationTests.java | 40 +++++++++++++++++-- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java index 21710983c2143..2349080d750cb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.plan.logical; import org.elasticsearch.TransportVersion; +import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; @@ -60,4 +61,25 @@ protected Limit copyInstance(Limit instance, TransportVersion version) throws IO Limit deserializedCopy = super.copyInstance(instance, version); return deserializedCopy.withDuplicated(instance.duplicated()).withLocal(instance.local()); } + + public void testSerializationWithGroupingsOnOldVersion() { + TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); + Source source = randomSource(); + Expression limit = FieldAttributeTests.createFieldAttribute(0, false); + LogicalPlan child = randomChild(0); + List groupings = randomList(1, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); + Limit instance = new Limit(source, limit, child, groupings, false, false); + Exception e = expectThrows(IllegalArgumentException.class, () -> copyInstance(instance, oldVersion)); + assertEquals("LIMIT BY is not supported by all nodes in the cluster", e.getMessage()); + } + + public void testSerializationWithoutGroupingsOnOldVersion() throws IOException { + TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); + Source source = randomSource(); + Expression limit = FieldAttributeTests.createFieldAttribute(0, false); + LogicalPlan child = randomChild(0); + Limit instance = new Limit(source, limit, child, List.of(), false, false); + Limit deserialized = copyInstance(instance, oldVersion); + assertEquals(List.of(), deserialized.groupings()); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java index 133f420fc3fc2..9ddf782067152 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java @@ -7,25 +7,34 @@ package org.elasticsearch.xpack.esql.plan.physical; +import org.elasticsearch.TransportVersion; +import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; +import org.elasticsearch.xpack.esql.plan.logical.Limit; import java.io.IOException; +import java.util.List; public class LimitExecSerializationTests extends AbstractPhysicalPlanSerializationTests { public static LimitExec randomLimitExec(int depth) { Source source = randomSource(); PhysicalPlan child = randomChild(depth); Expression limit = randomLimit(); - return new LimitExec(source, child, limit, randomEstimatedRowSize()); + return new LimitExec(source, child, limit, randomGroupings(), randomEstimatedRowSize()); } private static Expression randomLimit() { return new Literal(randomSource(), between(0, Integer.MAX_VALUE), DataType.INTEGER); } + private static List randomGroupings() { + return randomList(0, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); + } + @Override protected LimitExec createTestInstance() { return randomLimitExec(0); @@ -35,18 +44,41 @@ protected LimitExec createTestInstance() { protected LimitExec mutateInstance(LimitExec instance) throws IOException { PhysicalPlan child = instance.child(); Expression limit = instance.limit(); + List groupings = instance.groupings(); Integer estimatedRowSize = instance.estimatedRowSize(); - switch (between(0, 2)) { + switch (between(0, 3)) { case 0 -> child = randomValueOtherThan(child, () -> randomChild(0)); case 1 -> limit = randomValueOtherThan(limit, LimitExecSerializationTests::randomLimit); - case 2 -> estimatedRowSize = randomValueOtherThan(estimatedRowSize, LimitExecSerializationTests::randomEstimatedRowSize); + case 2 -> groupings = randomValueOtherThan(groupings, LimitExecSerializationTests::randomGroupings); + case 3 -> estimatedRowSize = randomValueOtherThan(estimatedRowSize, LimitExecSerializationTests::randomEstimatedRowSize); default -> throw new AssertionError("Unexpected case"); } - return new LimitExec(instance.source(), child, limit, estimatedRowSize); + return new LimitExec(instance.source(), child, limit, groupings, estimatedRowSize); } @Override protected boolean alwaysEmptySource() { return true; } + + public void testSerializationWithGroupingsOnOldVersion() { + TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); + Source source = randomSource(); + PhysicalPlan child = randomChild(0); + Expression limit = randomLimit(); + List groupings = randomList(1, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); + LimitExec instance = new LimitExec(source, child, limit, groupings, randomEstimatedRowSize()); + Exception e = expectThrows(IllegalArgumentException.class, () -> copyInstance(instance, oldVersion)); + assertEquals("LIMIT BY is not supported by all nodes in the cluster", e.getMessage()); + } + + public void testSerializationWithoutGroupingsOnOldVersion() throws IOException { + TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); + Source source = randomSource(); + PhysicalPlan child = randomChild(0); + Expression limit = randomLimit(); + LimitExec instance = new LimitExec(source, child, limit, randomEstimatedRowSize()); + LimitExec deserialized = copyInstance(instance, oldVersion); + assertEquals(List.of(), deserialized.groupings()); + } } From 6a24032d608ee8bd458c067345bf6929559dd9ff Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 15:50:42 +0100 Subject: [PATCH 06/14] Undoes any logical or physical plan part --- .../definitions/referable/esql_limit_by.csv | 1 - .../resources/transport/upper_bounds/9.4.csv | 2 +- .../xpack/esql/plan/logical/Limit.java | 54 ++++--------------- .../xpack/esql/plan/physical/LimitExec.java | 34 ++---------- .../esql/planner/LocalExecutionPlanner.java | 25 +-------- .../esql/planner/mapper/LocalMapper.java | 2 +- .../xpack/esql/planner/mapper/Mapper.java | 2 +- .../xpack/esql/plugin/EsqlPlugin.java | 2 - .../plan/logical/LimitSerializationTests.java | 40 ++------------ .../physical/LimitExecSerializationTests.java | 40 ++------------ 10 files changed, 27 insertions(+), 175 deletions(-) delete mode 100644 server/src/main/resources/transport/definitions/referable/esql_limit_by.csv diff --git a/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv b/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv deleted file mode 100644 index c988fdc9fe987..0000000000000 --- a/server/src/main/resources/transport/definitions/referable/esql_limit_by.csv +++ /dev/null @@ -1 +0,0 @@ -9304000 diff --git a/server/src/main/resources/transport/upper_bounds/9.4.csv b/server/src/main/resources/transport/upper_bounds/9.4.csv index 5a21cc3ebd006..c60446d500473 100644 --- a/server/src/main/resources/transport/upper_bounds/9.4.csv +++ b/server/src/main/resources/transport/upper_bounds/9.4.csv @@ -1 +1 @@ -esql_limit_by,9304000 +query_dsl_boxplot_exponential_histogram_support,9303000 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java index 6695cf72385d5..6d57f3010df73 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Limit.java @@ -6,29 +6,22 @@ */ package org.elasticsearch.xpack.esql.plan.logical; -import org.elasticsearch.TransportVersion; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.capabilities.TelemetryAware; -import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import java.io.IOException; -import java.util.List; import java.util.Objects; public class Limit extends UnaryPlan implements TelemetryAware, PipelineBreaker, ExecutesOn { - public static final TransportVersion ESQL_LIMIT_BY = TransportVersion.fromName("esql_limit_by"); - public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Limit", Limit::new); private final Expression limit; - - private List groupings; /** * Important for optimizations. This should be {@code false} in most cases, which allows this instance to be duplicated past a child * plan node that increases the number of rows, like for LOOKUP JOIN and MV_EXPAND. @@ -50,24 +43,11 @@ public Limit(Source source, Expression limit, LogicalPlan child) { this(source, limit, child, false, false); } - /** - * Create a new instance with groupings, which are the expressions used in LIMIT BY. This sets {@link Limit#duplicated} - * and {@link Limit#local} to {@code false}. - */ - public Limit(Source source, Expression limit, LogicalPlan child, List groupings) { - this(source, limit, child, groupings, false, false); - } - - public Limit(Source source, Expression limit, LogicalPlan child, List groupings, boolean duplicated, boolean local) { + public Limit(Source source, Expression limit, LogicalPlan child, boolean duplicated, boolean local) { super(source, child); this.limit = limit; this.duplicated = duplicated; this.local = local; - this.groupings = groupings; - } - - public Limit(Source source, Expression limit, LogicalPlan child, boolean duplicated, boolean local) { - this(source, limit, child, List.of(), duplicated, local); } /** @@ -78,14 +58,9 @@ private Limit(StreamInput in) throws IOException { Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(LogicalPlan.class), - List.of(), false, false ); - - if (in.getTransportVersion().supports(ESQL_LIMIT_BY)) { - this.groupings = in.readNamedWriteableCollectionAsList(Expression.class); - } } /** @@ -98,12 +73,6 @@ public void writeTo(StreamOutput out) throws IOException { Source.EMPTY.writeTo(out); out.writeNamedWriteable(limit()); out.writeNamedWriteable(child()); - - if (out.getTransportVersion().supports(ESQL_LIMIT_BY)) { - out.writeNamedWriteableCollection(groupings()); - } else if (groupings.isEmpty() == false) { - throw new IllegalArgumentException("LIMIT BY is not supported by all nodes in the cluster"); - } } @Override @@ -113,24 +82,20 @@ public String getWriteableName() { @Override protected NodeInfo info() { - return NodeInfo.create(this, Limit::new, limit, child(), groupings, duplicated, local); + return NodeInfo.create(this, Limit::new, limit, child(), duplicated, local); } @Override public Limit replaceChild(LogicalPlan newChild) { - return new Limit(source(), limit, newChild, groupings, duplicated, local); + return new Limit(source(), limit, newChild, duplicated, local); } public Expression limit() { return limit; } - public List groupings() { - return groupings; - } - public Limit withLimit(Expression limit) { - return new Limit(source(), limit, child(), groupings, duplicated, local); + return new Limit(source(), limit, child(), duplicated, local); } public boolean duplicated() { @@ -142,21 +107,21 @@ public boolean local() { } public Limit withDuplicated(boolean duplicated) { - return new Limit(source(), limit, child(), groupings, duplicated, local); + return new Limit(source(), limit, child(), duplicated, local); } public Limit withLocal(boolean newLocal) { - return new Limit(source(), limit, child(), groupings, duplicated, newLocal); + return new Limit(source(), limit, child(), duplicated, newLocal); } @Override public boolean expressionsResolved() { - return limit.resolved() && Resolvables.resolved(groupings); + return limit.resolved(); } @Override public int hashCode() { - return Objects.hash(limit, child(), duplicated, local, groupings); + return Objects.hash(limit, child(), duplicated, local); } @Override @@ -173,8 +138,7 @@ public boolean equals(Object obj) { return Objects.equals(limit, other.limit) && Objects.equals(child(), other.child()) && (duplicated == other.duplicated) - && (local == other.local) - && Objects.equals(groupings, other.groupings); + && (local == other.local); } @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java index 4e8eab99de488..0619a3b2fd77d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LimitExec.java @@ -22,8 +22,6 @@ import java.util.List; import java.util.Objects; -import static org.elasticsearch.xpack.esql.plan.logical.Limit.ESQL_LIMIT_BY; - public class LimitExec extends UnaryExec implements EstimatesRowSize { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( PhysicalPlan.class, @@ -35,31 +33,20 @@ public class LimitExec extends UnaryExec implements EstimatesRowSize { private final Expression limit; private final Integer estimatedRowSize; - private List groupings; - public LimitExec(Source source, PhysicalPlan child, Expression limit, List groupings, Integer estimatedRowSize) { + public LimitExec(Source source, PhysicalPlan child, Expression limit, Integer estimatedRowSize) { super(source, child); this.limit = limit; - this.groupings = groupings; this.estimatedRowSize = estimatedRowSize; } - public LimitExec(Source source, PhysicalPlan child, Expression limit, Integer estimatedRowSize) { - this(source, child, limit, List.of(), estimatedRowSize); - } - private LimitExec(StreamInput in) throws IOException { this( Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(PhysicalPlan.class), in.readNamedWriteable(Expression.class), - List.of(), in.getTransportVersion().supports(ESQL_LIMIT_ROW_SIZE) ? in.readOptionalVInt() : null ); - - if (in.getTransportVersion().supports(ESQL_LIMIT_BY)) { - this.groupings = in.readNamedWriteableCollectionAsList(Expression.class); - } } @Override @@ -70,12 +57,6 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().supports(ESQL_LIMIT_ROW_SIZE)) { out.writeOptionalVInt(estimatedRowSize); } - - if (out.getTransportVersion().supports(ESQL_LIMIT_BY)) { - out.writeNamedWriteableCollection(groupings()); - } else if (groupings.isEmpty() == false) { - throw new IllegalArgumentException("LIMIT BY is not supported by all nodes in the cluster"); - } } @Override @@ -85,22 +66,18 @@ public String getWriteableName() { @Override protected NodeInfo info() { - return NodeInfo.create(this, LimitExec::new, child(), limit, groupings, estimatedRowSize); + return NodeInfo.create(this, LimitExec::new, child(), limit, estimatedRowSize); } @Override public LimitExec replaceChild(PhysicalPlan newChild) { - return new LimitExec(source(), newChild, limit, groupings, estimatedRowSize); + return new LimitExec(source(), newChild, limit, estimatedRowSize); } public Expression limit() { return limit; } - public List groupings() { - return groupings; - } - public Integer estimatedRowSize() { return estimatedRowSize; } @@ -113,12 +90,12 @@ public PhysicalPlan estimateRowSize(State unused) { state.add(needsSortedDocIds, output); int size = state.consumeAllFields(true); size = Math.max(size, 1); - return Objects.equals(this.estimatedRowSize, size) ? this : new LimitExec(source(), child(), limit, groupings, size); + return Objects.equals(this.estimatedRowSize, size) ? this : new LimitExec(source(), child(), limit, size); } @Override public int hashCode() { - return Objects.hash(limit, groupings, estimatedRowSize, child()); + return Objects.hash(limit, estimatedRowSize, child()); } @Override @@ -133,7 +110,6 @@ public boolean equals(Object obj) { LimitExec other = (LimitExec) obj; return Objects.equals(limit, other.limit) - && Objects.equals(groupings, other.groupings) && Objects.equals(estimatedRowSize, other.estimatedRowSize) && Objects.equals(child(), other.child()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 86fa32173f198..9f33f2060d3bf 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -38,7 +38,6 @@ import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.EvalOperator.EvalOperatorFactory; import org.elasticsearch.compute.operator.FilterOperator.FilterOperatorFactory; -import org.elasticsearch.compute.operator.GroupedLimitOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.LocalSourceOperator; import org.elasticsearch.compute.operator.LocalSourceOperator.LocalSourceFactory; @@ -609,14 +608,6 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte ); } - private static int getAttributeChannel(Expression expression, Layout layout, String errMessage) { - if (expression instanceof Attribute a) { - return layout.get(a.id()).channel(); - } else { - throw new EsqlIllegalArgumentException(errMessage); - } - } - private static MappedFieldType.FieldExtractPreference fieldExtractPreference(TopNExec topNExec, Set nameIds) { MappedFieldType.FieldExtractPreference fieldExtractPreference = MappedFieldType.FieldExtractPreference.NONE; // See if any of the NameIds is marked as having been loaded with doc-values preferences, which will affect the ElementType chosen. @@ -1390,21 +1381,7 @@ private PhysicalOperation planFilter(FilterExec filter, LocalExecutionPlannerCon private PhysicalOperation planLimit(LimitExec limit, LocalExecutionPlannerContext context) { PhysicalOperation source = plan(limit.child(), context); - int limitValue = (Integer) limit.limit().fold(context.foldCtx); - if (limit.groupings().isEmpty()) { - return source.with(new LimitOperator.Factory(limitValue), source.layout); - } - Layout layout = source.layout; - List groupKeys = limit.groupings() - .stream() - .map(g -> getAttributeChannel(g, layout, "expression in LIMIT BY must be an attribute")) - .toList(); - List inverse = layout.inverse(); - List elementTypes = new ArrayList<>(layout.numberOfChannels()); - for (int channel = 0; channel < inverse.size(); channel++) { - elementTypes.add(PlannerUtils.toElementType(inverse.get(channel).type())); - } - return source.with(new GroupedLimitOperator.Factory(limitValue, groupKeys, elementTypes), source.layout); + return source.with(new LimitOperator.Factory((Integer) limit.limit().fold(context.foldCtx)), source.layout); } private PhysicalOperation planMvExpand(MvExpandExec mvExpandExec, LocalExecutionPlannerContext context) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java index 873084feac0cf..934c2f0ff02f8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java @@ -88,7 +88,7 @@ private PhysicalPlan mapUnary(UnaryPlan unary) { } if (unary instanceof Limit limit) { - return new LimitExec(limit.source(), mappedChild, limit.limit(), limit.groupings(), null); + return new LimitExec(limit.source(), mappedChild, limit.limit(), null); } if (unary instanceof TopN topN) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java index 04efc8d028bde..50da38255bf4c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java @@ -139,7 +139,7 @@ else if (aggregate.groupings() if (unary instanceof Limit limit) { mappedChild = addExchangeForFragment(limit, mappedChild); - return new LimitExec(limit.source(), mappedChild, limit.limit(), limit.groupings(), null); + return new LimitExec(limit.source(), mappedChild, limit.limit(), null); } if (unary instanceof TopN topN) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 077b243a25102..ccfbf68763562 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -28,7 +28,6 @@ import org.elasticsearch.compute.operator.AggregationOperator; import org.elasticsearch.compute.operator.AsyncOperator; import org.elasticsearch.compute.operator.DriverStatus; -import org.elasticsearch.compute.operator.GroupedLimitOperator; import org.elasticsearch.compute.operator.HashAggregationOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.MMROperator; @@ -393,7 +392,6 @@ public List getNamedWriteables() { entries.add(org.elasticsearch.xpack.esql.datasources.AsyncExternalSourceOperator.Status.ENTRY); entries.add(HashAggregationOperator.Status.ENTRY); entries.add(LimitOperator.Status.ENTRY); - entries.add(GroupedLimitOperator.Status.ENTRY); entries.add(LuceneOperator.Status.ENTRY); entries.add(TopNOperatorStatus.ENTRY); entries.add(MvExpandOperator.Status.ENTRY); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java index 2349080d750cb..d762416db64d8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/logical/LimitSerializationTests.java @@ -8,13 +8,11 @@ package org.elasticsearch.xpack.esql.plan.logical; import org.elasticsearch.TransportVersion; -import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; import java.io.IOException; -import java.util.List; public class LimitSerializationTests extends AbstractLogicalPlanSerializationTests { @Override @@ -22,30 +20,23 @@ protected Limit createTestInstance() { Source source = randomSource(); Expression limit = FieldAttributeTests.createFieldAttribute(0, false); LogicalPlan child = randomChild(0); - List groupings = randomGroupings(); - return new Limit(source, limit, child, groupings, randomBoolean(), randomBoolean()); + return new Limit(source, limit, child, randomBoolean(), randomBoolean()); } @Override protected Limit mutateInstance(Limit instance) throws IOException { Expression limit = instance.limit(); LogicalPlan child = instance.child(); - List groupings = instance.groupings(); boolean duplicated = instance.duplicated(); boolean local = instance.local(); - switch (randomIntBetween(0, 4)) { + switch (randomIntBetween(0, 3)) { case 0 -> limit = randomValueOtherThan(limit, () -> FieldAttributeTests.createFieldAttribute(0, false)); case 1 -> child = randomValueOtherThan(child, () -> randomChild(0)); - case 2 -> groupings = randomValueOtherThan(groupings, LimitSerializationTests::randomGroupings); - case 3 -> duplicated = duplicated == false; - case 4 -> local = local == false; + case 2 -> duplicated = duplicated == false; + case 3 -> local = local == false; default -> throw new IllegalStateException("Should never reach here"); } - return new Limit(instance.source(), limit, child, groupings, duplicated, local); - } - - private static List randomGroupings() { - return randomList(0, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); + return new Limit(instance.source(), limit, child, duplicated, local); } @Override @@ -61,25 +52,4 @@ protected Limit copyInstance(Limit instance, TransportVersion version) throws IO Limit deserializedCopy = super.copyInstance(instance, version); return deserializedCopy.withDuplicated(instance.duplicated()).withLocal(instance.local()); } - - public void testSerializationWithGroupingsOnOldVersion() { - TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); - Source source = randomSource(); - Expression limit = FieldAttributeTests.createFieldAttribute(0, false); - LogicalPlan child = randomChild(0); - List groupings = randomList(1, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); - Limit instance = new Limit(source, limit, child, groupings, false, false); - Exception e = expectThrows(IllegalArgumentException.class, () -> copyInstance(instance, oldVersion)); - assertEquals("LIMIT BY is not supported by all nodes in the cluster", e.getMessage()); - } - - public void testSerializationWithoutGroupingsOnOldVersion() throws IOException { - TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); - Source source = randomSource(); - Expression limit = FieldAttributeTests.createFieldAttribute(0, false); - LogicalPlan child = randomChild(0); - Limit instance = new Limit(source, limit, child, List.of(), false, false); - Limit deserialized = copyInstance(instance, oldVersion); - assertEquals(List.of(), deserialized.groupings()); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java index 9ddf782067152..133f420fc3fc2 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plan/physical/LimitExecSerializationTests.java @@ -7,34 +7,25 @@ package org.elasticsearch.xpack.esql.plan.physical; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.expression.function.FieldAttributeTests; -import org.elasticsearch.xpack.esql.plan.logical.Limit; import java.io.IOException; -import java.util.List; public class LimitExecSerializationTests extends AbstractPhysicalPlanSerializationTests { public static LimitExec randomLimitExec(int depth) { Source source = randomSource(); PhysicalPlan child = randomChild(depth); Expression limit = randomLimit(); - return new LimitExec(source, child, limit, randomGroupings(), randomEstimatedRowSize()); + return new LimitExec(source, child, limit, randomEstimatedRowSize()); } private static Expression randomLimit() { return new Literal(randomSource(), between(0, Integer.MAX_VALUE), DataType.INTEGER); } - private static List randomGroupings() { - return randomList(0, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); - } - @Override protected LimitExec createTestInstance() { return randomLimitExec(0); @@ -44,41 +35,18 @@ protected LimitExec createTestInstance() { protected LimitExec mutateInstance(LimitExec instance) throws IOException { PhysicalPlan child = instance.child(); Expression limit = instance.limit(); - List groupings = instance.groupings(); Integer estimatedRowSize = instance.estimatedRowSize(); - switch (between(0, 3)) { + switch (between(0, 2)) { case 0 -> child = randomValueOtherThan(child, () -> randomChild(0)); case 1 -> limit = randomValueOtherThan(limit, LimitExecSerializationTests::randomLimit); - case 2 -> groupings = randomValueOtherThan(groupings, LimitExecSerializationTests::randomGroupings); - case 3 -> estimatedRowSize = randomValueOtherThan(estimatedRowSize, LimitExecSerializationTests::randomEstimatedRowSize); + case 2 -> estimatedRowSize = randomValueOtherThan(estimatedRowSize, LimitExecSerializationTests::randomEstimatedRowSize); default -> throw new AssertionError("Unexpected case"); } - return new LimitExec(instance.source(), child, limit, groupings, estimatedRowSize); + return new LimitExec(instance.source(), child, limit, estimatedRowSize); } @Override protected boolean alwaysEmptySource() { return true; } - - public void testSerializationWithGroupingsOnOldVersion() { - TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); - Source source = randomSource(); - PhysicalPlan child = randomChild(0); - Expression limit = randomLimit(); - List groupings = randomList(1, 3, () -> FieldAttributeTests.createFieldAttribute(0, false)); - LimitExec instance = new LimitExec(source, child, limit, groupings, randomEstimatedRowSize()); - Exception e = expectThrows(IllegalArgumentException.class, () -> copyInstance(instance, oldVersion)); - assertEquals("LIMIT BY is not supported by all nodes in the cluster", e.getMessage()); - } - - public void testSerializationWithoutGroupingsOnOldVersion() throws IOException { - TransportVersion oldVersion = TransportVersionUtils.getPreviousVersion(Limit.ESQL_LIMIT_BY); - Source source = randomSource(); - PhysicalPlan child = randomChild(0); - Expression limit = randomLimit(); - LimitExec instance = new LimitExec(source, child, limit, randomEstimatedRowSize()); - LimitExec deserialized = copyInstance(instance, oldVersion); - assertEquals(List.of(), deserialized.groupings()); - } } From e0d2c30047b304ff39180c400d0afdc939e510de Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 17:56:41 +0100 Subject: [PATCH 07/14] Rename --- ...onKeyEncoder.java => GroupKeyEncoder.java} | 4 +-- .../operator/GroupedLimitOperator.java | 10 +++---- ...erTests.java => GroupKeyEncoderTests.java} | 26 +++++++++---------- .../operator/GroupedLimitOperatorTests.java | 10 +++---- 4 files changed, 23 insertions(+), 27 deletions(-) rename x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/{PositionKeyEncoder.java => GroupKeyEncoder.java} (97%) rename x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/{PositionKeyEncoderTests.java => GroupKeyEncoderTests.java} (85%) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java similarity index 97% rename from x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java rename to x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java index 3bb89441e7103..9bf27ee4d6a32 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/PositionKeyEncoder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java @@ -27,14 +27,14 @@ * in block iteration order. This means {@code [1, 2]} and {@code [2, 1]} produce different keys. * Null positions are encoded as a value count of zero. */ -public class PositionKeyEncoder { +public class GroupKeyEncoder { private final int[] groupChannels; private final ElementType[] elementTypes; private final BytesRefBuilder scratch = new BytesRefBuilder(); private final BytesRef scratchBytesRef = new BytesRef(); - public PositionKeyEncoder(int[] groupChannels, List elementTypes) { + public GroupKeyEncoder(int[] groupChannels, List elementTypes) { this.groupChannels = groupChannels; this.elementTypes = new ElementType[groupChannels.length]; for (int i = 0; i < groupChannels.length; i++) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index aa4b9f6083018..67f6411d85150 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -36,7 +36,7 @@ public class GroupedLimitOperator implements Operator { private final int limitPerGroup; - private final PositionKeyEncoder keyEncoder; + private final GroupKeyEncoder keyEncoder; private final BigArrays bigArrays; private final BytesRefHashTable seenKeys; private IntArray counts; @@ -48,7 +48,7 @@ public class GroupedLimitOperator implements Operator { private Page lastOutput; private boolean finished; - public GroupedLimitOperator(int limitPerGroup, PositionKeyEncoder keyEncoder, BlockFactory blockFactory) { + public GroupedLimitOperator(int limitPerGroup, GroupKeyEncoder keyEncoder, BlockFactory blockFactory) { this.limitPerGroup = limitPerGroup; this.keyEncoder = keyEncoder; this.bigArrays = blockFactory.bigArrays(); @@ -69,11 +69,7 @@ public Factory(int limitPerGroup, List groupChannels, List @Override public GroupedLimitOperator get(DriverContext driverContext) { - return new GroupedLimitOperator( - limitPerGroup, - new PositionKeyEncoder(groupChannels, elementTypes), - driverContext.blockFactory() - ); + return new GroupedLimitOperator(limitPerGroup, new GroupKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java similarity index 85% rename from x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java rename to x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java index f504e931ca36c..45e5c1f3c0d84 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/PositionKeyEncoderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java @@ -15,13 +15,13 @@ import java.util.List; -public class PositionKeyEncoderTests extends ComputeTestCase { +public class GroupKeyEncoderTests extends ComputeTestCase { public void testSameIntValuesSameKey() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newIntArrayVector(new int[] { 7, 7 }, 2).asBlock()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -32,7 +32,7 @@ public void testDifferentIntValuesDifferentKeys() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newIntArrayVector(new int[] { 1, 2 }, 2).asBlock()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -43,7 +43,7 @@ public void testLongType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newLongArrayVector(new long[] { 100, 200, 100 }, 3).asBlock()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -58,7 +58,7 @@ public void testDoubleType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newDoubleArrayVector(new double[] { 1.5, 2.5, 1.5 }, 3).asBlock()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.DOUBLE)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.DOUBLE)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -73,7 +73,7 @@ public void testFloatType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newFloatArrayVector(new float[] { 1.5f, 2.5f, 1.5f }, 3).asBlock()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.FLOAT)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.FLOAT)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -92,7 +92,7 @@ public void testBooleanType() { builder.appendBoolean(true); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.BOOLEAN)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.BOOLEAN)); BytesRef keyTrue = copy(encoder.encode(page, 0)); BytesRef keyFalse = copy(encoder.encode(page, 1)); BytesRef keyTrue2 = copy(encoder.encode(page, 2)); @@ -112,7 +112,7 @@ public void testBytesRefType() { builder.appendBytesRef(new BytesRef("hello")); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.BYTES_REF)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.BYTES_REF)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -135,7 +135,7 @@ public void testNullProducesDistinctKey() { builder.appendLong(0); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -160,7 +160,7 @@ public void testMultivalueOrderMatters() { builder.endPositionEntry(); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -185,7 +185,7 @@ public void testMultivalueSameOrderSameKey() { builder.endPositionEntry(); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -207,7 +207,7 @@ public void testCompositeKey() { bytesBuilder.appendBytesRef(new BytesRef("a")); Page page = new Page(longBuilder.build(), bytesBuilder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.BYTES_REF)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.BYTES_REF)); BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -233,7 +233,7 @@ public void testSingleValueMatchesOneElementMultivalue() { builder.endPositionEntry(); Page page = new Page(builder.build()); try { - PositionKeyEncoder encoder = new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index 3ca37518f80f6..8bd87b0acc509 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -107,7 +107,7 @@ public void testLimitPerGroupSinglePage() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 2, - new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -137,7 +137,7 @@ public void testLimitPerGroupAcrossPages() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 2, - new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -173,7 +173,7 @@ public void testEntirePageFiltered() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, - new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -198,7 +198,7 @@ public void testPagePassesThroughWhenAllAccepted() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 10, - new PositionKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -223,7 +223,7 @@ public void testMultipleGroupChannels() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, - new PositionKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), + new GroupKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), blockFactory ) ) { From 10b97f40f33e87612e1f29802a7d000e6b385779 Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 19:27:47 +0100 Subject: [PATCH 08/14] Adds code for the edge case limitPerGroup = 0 --- .../operator/GroupedLimitOperator.java | 9 +++++- .../operator/GroupedLimitOperatorTests.java | 30 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 67f6411d85150..176aee097940d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -89,6 +89,11 @@ public void addInput(Page page) { int positionCount = page.getPositionCount(); rowsReceived += positionCount; + if (limitPerGroup == 0) { + page.releaseBlocks(); + return; + } + int acceptedCount = 0; int[] accepted = new int[positionCount]; @@ -96,11 +101,13 @@ public void addInput(Page page) { for (int pos = 0; pos < positionCount; pos++) { BytesRef key = keyEncoder.encode(page, pos); long hashOrd = seenKeys.add(key); - int count = 0; + int count; long ord; if (hashOrd >= 0) { ord = hashOrd; counts = bigArrays.grow(counts, ord + 1); + count = 0; + counts.set(ord, 0); } else { ord = -(hashOrd + 1); count = counts.get(ord); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index 8bd87b0acc509..73864ece8c9cd 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -213,6 +213,36 @@ public void testPagePassesThroughWhenAllAccepted() { } } + /** + * With limitPerGroup=0 every row is dropped regardless of the group key. + * No keys should be tracked and status must reflect zero emitted rows. + */ + public void testLimitPerGroupZero() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 0, + new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + op.addInput(p1); + assertThat(op.getOutput(), nullValue()); + + Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 2 }, 3).asBlock()); + op.addInput(p2); + assertThat(op.getOutput(), nullValue()); + + GroupedLimitOperator.Status status = op.status(); + assertThat(status.rowsReceived(), equalTo(6L)); + assertThat(status.rowsEmitted(), equalTo(0L)); + assertThat(status.pagesProcessed(), equalTo(0)); + assertThat(status.groupCount(), equalTo(0)); + } + } + /** * Groups are determined by composite keys across multiple channels. * (1,10), (1,20), and (2,10) are three distinct groups. From 3fbeb25b52239c414ae4ba908ad050f06fa5f76b Mon Sep 17 00:00:00 2001 From: ncordon Date: Tue, 3 Mar 2026 19:47:32 +0100 Subject: [PATCH 09/14] Adds some explanatory comments --- .../compute/operator/GroupKeyEncoder.java | 16 ++++++++++++++++ .../compute/operator/GroupedLimitOperator.java | 13 ++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java index 9bf27ee4d6a32..989e5c8056a10 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java @@ -109,6 +109,13 @@ private void encodeBlock(Block block, ElementType type, int position) { } } + /** + * Appends a non-negative int using variable-length encoding (1–5 bytes). + * Each byte stores 7 data bits in the low bits; the high bit is set to 1 + * to indicate that more bytes follow, and 0 for the final byte. Values + * below 128 are encoded in a single byte, making this compact for the + * small numbers typical of value counts and byte-array lengths. + */ private void writeVInt(int value) { while ((value & ~0x7F) != 0) { scratch.append((byte) ((value & 0x7F) | 0x80)); @@ -117,6 +124,11 @@ private void writeVInt(int value) { scratch.append((byte) value); } + /** + * Appends an int as exactly 4 bytes in big-endian (most-significant byte + * first) order. Fixed-width encoding preserves the natural sort order of + * the original values and avoids the need for a length prefix. + */ private void writeInt(int value) { scratch.append((byte) (value >> 24)); scratch.append((byte) (value >> 16)); @@ -124,6 +136,10 @@ private void writeInt(int value) { scratch.append((byte) value); } + /** + * Appends a long as exactly 8 bytes in big-endian order by writing the + * upper 32 bits followed by the lower 32 bits, each via {@link #writeInt}. + */ private void writeLong(long value) { writeInt((int) (value >> 32)); writeInt((int) value); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 176aee097940d..09972e810e356 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -37,8 +37,19 @@ public class GroupedLimitOperator implements Operator { private final int limitPerGroup; private final GroupKeyEncoder keyEncoder; - private final BigArrays bigArrays; + /* + BlockHash unrolls multivalues (i.e. for each unique element of a multivalue [1,2,2,1] it would create + a new key). + + In contrast, in this operator we consider every multivalue as a key. Two multivalues are equal if they hold + the same values in the exact same order. That's why we need to use: + + - a ref hash table. Every time we add a new key it gives a monotonically increasing integer that we will + use to index in the counts array, Ordinal(key)) + - a counts array to keep track of the number of times we have seen each Ordinal(key) + */ private final BytesRefHashTable seenKeys; + private final BigArrays bigArrays; private IntArray counts; private int pagesProcessed; From cb5bf9a8d2f39000666b44f8a07de2bf3f913ebe Mon Sep 17 00:00:00 2001 From: ncordon Date: Wed, 4 Mar 2026 16:27:50 +0100 Subject: [PATCH 10/14] Uses javadoc instead --- .../operator/GroupedLimitOperator.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 09972e810e356..ca25ce0039b66 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -37,16 +37,17 @@ public class GroupedLimitOperator implements Operator { private final int limitPerGroup; private final GroupKeyEncoder keyEncoder; - /* - BlockHash unrolls multivalues (i.e. for each unique element of a multivalue [1,2,2,1] it would create - a new key). - - In contrast, in this operator we consider every multivalue as a key. Two multivalues are equal if they hold - the same values in the exact same order. That's why we need to use: - - - a ref hash table. Every time we add a new key it gives a monotonically increasing integer that we will - use to index in the counts array, Ordinal(key)) - - a counts array to keep track of the number of times we have seen each Ordinal(key) + /** + * BlockHash unrolls multivalues (i.e. for each unique element of a multivalue [1,2,2,1] it would create + * a new key). + * + *

In contrast, in this operator we consider every multivalue as a key. Two multivalues are equal if they hold + * the same values in the exact same order. That's why we need to use: + *

    + *
  • a ref hash table. Every time we add a new key it gives a monotonically increasing integer that we will + * use to index in the counts array, {@code Ordinal(key)}
  • + *
  • a counts array to keep track of the number of times we have seen each {@code Ordinal(key)}
  • + *
*/ private final BytesRefHashTable seenKeys; private final BigArrays bigArrays; From d2bd9f374858adb7db07226d69f6a2e457ae2554 Mon Sep 17 00:00:00 2001 From: ncordon Date: Wed, 4 Mar 2026 18:33:00 +0100 Subject: [PATCH 11/14] Addresses pr review feedback --- .../compute/operator/GroupKeyEncoder.java | 69 +++----- .../operator/GroupedLimitOperator.java | 82 +++++----- .../operator/GroupKeyEncoderTests.java | 42 +++-- .../operator/GroupedLimitOperatorTests.java | 147 +++++++++++++++++- .../xpack/esql/plugin/EsqlPlugin.java | 2 + 5 files changed, 218 insertions(+), 124 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java index 989e5c8056a10..121c098f61aff 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupKeyEncoder.java @@ -8,7 +8,6 @@ package org.elasticsearch.compute.operator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; @@ -18,6 +17,9 @@ import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.topn.DefaultUnsortableTopNEncoder; +import org.elasticsearch.compute.operator.topn.TopNEncoder; +import org.elasticsearch.core.Releasable; import java.util.List; @@ -27,19 +29,22 @@ * in block iteration order. This means {@code [1, 2]} and {@code [2, 1]} produce different keys. * Null positions are encoded as a value count of zero. */ -public class GroupKeyEncoder { +public class GroupKeyEncoder implements Releasable { + + private static final DefaultUnsortableTopNEncoder encoder = TopNEncoder.DEFAULT_UNSORTABLE; private final int[] groupChannels; private final ElementType[] elementTypes; - private final BytesRefBuilder scratch = new BytesRefBuilder(); + private final BreakingBytesRefBuilder scratch; private final BytesRef scratchBytesRef = new BytesRef(); - public GroupKeyEncoder(int[] groupChannels, List elementTypes) { + public GroupKeyEncoder(int[] groupChannels, List elementTypes, BreakingBytesRefBuilder scratch) { this.groupChannels = groupChannels; this.elementTypes = new ElementType[groupChannels.length]; for (int i = 0; i < groupChannels.length; i++) { this.elementTypes[i] = elementTypes.get(groupChannels[i]); } + this.scratch = scratch; } /** @@ -52,54 +57,53 @@ public BytesRef encode(Page page, int position) { Block block = page.getBlock(groupChannels[i]); encodeBlock(block, elementTypes[i], position); } - return scratch.get(); + return scratch.bytesRefView(); } private void encodeBlock(Block block, ElementType type, int position) { if (block.isNull(position)) { - writeVInt(0); + encoder.encodeVInt(0, scratch); return; } int firstValueIndex = block.getFirstValueIndex(position); int valueCount = block.getValueCount(position); - writeVInt(valueCount); + encoder.encodeVInt(valueCount, scratch); switch (type) { case INT -> { IntBlock b = (IntBlock) block; for (int v = 0; v < valueCount; v++) { - writeInt(b.getInt(firstValueIndex + v)); + encoder.encodeInt(b.getInt(firstValueIndex + v), scratch); } } case LONG -> { LongBlock b = (LongBlock) block; for (int v = 0; v < valueCount; v++) { - writeLong(b.getLong(firstValueIndex + v)); + encoder.encodeLong(b.getLong(firstValueIndex + v), scratch); } } case DOUBLE -> { DoubleBlock b = (DoubleBlock) block; for (int v = 0; v < valueCount; v++) { - writeLong(Double.doubleToLongBits(b.getDouble(firstValueIndex + v))); + encoder.encodeDouble(b.getDouble(firstValueIndex + v), scratch); } } case FLOAT -> { FloatBlock b = (FloatBlock) block; for (int v = 0; v < valueCount; v++) { - writeInt(Float.floatToIntBits(b.getFloat(firstValueIndex + v))); + encoder.encodeFloat(b.getFloat(firstValueIndex + v), scratch); } } case BOOLEAN -> { BooleanBlock b = (BooleanBlock) block; for (int v = 0; v < valueCount; v++) { - scratch.append((byte) (b.getBoolean(firstValueIndex + v) ? 1 : 0)); + encoder.encodeBoolean(b.getBoolean(firstValueIndex + v), scratch); } } case BYTES_REF -> { BytesRefBlock b = (BytesRefBlock) block; for (int v = 0; v < valueCount; v++) { BytesRef ref = b.getBytesRef(firstValueIndex + v, scratchBytesRef); - writeVInt(ref.length); - scratch.append(ref.bytes, ref.offset, ref.length); + encoder.encodeBytesRef(ref, scratch); } } case NULL -> { @@ -109,39 +113,8 @@ private void encodeBlock(Block block, ElementType type, int position) { } } - /** - * Appends a non-negative int using variable-length encoding (1–5 bytes). - * Each byte stores 7 data bits in the low bits; the high bit is set to 1 - * to indicate that more bytes follow, and 0 for the final byte. Values - * below 128 are encoded in a single byte, making this compact for the - * small numbers typical of value counts and byte-array lengths. - */ - private void writeVInt(int value) { - while ((value & ~0x7F) != 0) { - scratch.append((byte) ((value & 0x7F) | 0x80)); - value >>>= 7; - } - scratch.append((byte) value); - } - - /** - * Appends an int as exactly 4 bytes in big-endian (most-significant byte - * first) order. Fixed-width encoding preserves the natural sort order of - * the original values and avoids the need for a length prefix. - */ - private void writeInt(int value) { - scratch.append((byte) (value >> 24)); - scratch.append((byte) (value >> 16)); - scratch.append((byte) (value >> 8)); - scratch.append((byte) value); - } - - /** - * Appends a long as exactly 8 bytes in big-endian order by writing the - * upper 32 bits followed by the lower 32 bits, each via {@link #writeInt}. - */ - private void writeLong(long value) { - writeInt((int) (value >> 32)); - writeInt((int) value); + @Override + public void close() { + scratch.close(); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index ca25ce0039b66..59a710d71f2bc 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -17,7 +17,6 @@ import org.elasticsearch.common.util.BytesRefHashTable; import org.elasticsearch.common.util.IntArray; import org.elasticsearch.compute.aggregation.blockhash.HashImplFactory; -import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; @@ -34,6 +33,29 @@ * Group keys use list semantics for multivalues: {@code [1,2]} and {@code [2,1]} are different groups. */ public class GroupedLimitOperator implements Operator { + public static final class Factory implements Operator.OperatorFactory { + private final int limitPerGroup; + private final int[] groupChannels; + private final List elementTypes; + + public Factory(int limitPerGroup, List groupChannels, List elementTypes) { + this.limitPerGroup = limitPerGroup; + this.groupChannels = groupChannels.stream().mapToInt(Integer::intValue).toArray(); + this.elementTypes = elementTypes; + } + + @Override + public GroupedLimitOperator get(DriverContext driverContext) { + BlockFactory blockFactory = driverContext.blockFactory(); + var scratch = new BreakingBytesRefBuilder(blockFactory.breaker(), "group-key-encoder"); + return new GroupedLimitOperator(limitPerGroup, new GroupKeyEncoder(groupChannels, elementTypes, scratch), blockFactory); + } + + @Override + public String describe() { + return "GroupedLimitOperator[limit = " + limitPerGroup + "]"; + } + } private final int limitPerGroup; private final GroupKeyEncoder keyEncoder; @@ -61,32 +83,18 @@ public class GroupedLimitOperator implements Operator { private boolean finished; public GroupedLimitOperator(int limitPerGroup, GroupKeyEncoder keyEncoder, BlockFactory blockFactory) { - this.limitPerGroup = limitPerGroup; - this.keyEncoder = keyEncoder; - this.bigArrays = blockFactory.bigArrays(); - this.seenKeys = HashImplFactory.newBytesRefHash(blockFactory); - this.counts = bigArrays.newIntArray(16, false); - } - - public static final class Factory implements Operator.OperatorFactory { - private final int limitPerGroup; - private final int[] groupChannels; - private final List elementTypes; - - public Factory(int limitPerGroup, List groupChannels, List elementTypes) { + boolean success = false; + try { this.limitPerGroup = limitPerGroup; - this.groupChannels = groupChannels.stream().mapToInt(Integer::intValue).toArray(); - this.elementTypes = elementTypes; - } - - @Override - public GroupedLimitOperator get(DriverContext driverContext) { - return new GroupedLimitOperator(limitPerGroup, new GroupKeyEncoder(groupChannels, elementTypes), driverContext.blockFactory()); - } - - @Override - public String describe() { - return "GroupedLimitOperator[limit = " + limitPerGroup + "]"; + this.keyEncoder = keyEncoder; + this.bigArrays = blockFactory.bigArrays(); + this.seenKeys = HashImplFactory.newBytesRefHash(blockFactory); + this.counts = bigArrays.newIntArray(16, false); + success = true; + } finally { + if (success == false) { + keyEncoder.close(); + } } } @@ -148,26 +156,8 @@ public void addInput(Page page) { } else { int[] positions = new int[acceptedCount]; System.arraycopy(accepted, 0, positions, 0, acceptedCount); - lastOutput = filterPage(page, positions); - } - } - - private static Page filterPage(Page page, int[] positions) { - Block[] blocks = new Block[page.getBlockCount()]; - Page result = null; - try { - for (int b = 0; b < blocks.length; b++) { - blocks[b] = page.getBlock(b).filter(false, positions); - } - result = new Page(blocks); - } finally { - if (result == null) { - Releasables.closeExpectNoException(page::releaseBlocks, Releasables.wrap(blocks)); - } else { - page.releaseBlocks(); - } + lastOutput = page.filter(false, positions); } - return result; } @Override @@ -204,7 +194,7 @@ public Status status() { @Override public void close() { - Releasables.closeExpectNoException(lastOutput == null ? () -> {} : lastOutput::releaseBlocks, seenKeys, counts); + Releasables.closeExpectNoException(lastOutput == null ? () -> {} : lastOutput::releaseBlocks, seenKeys, counts, keyEncoder); } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java index 45e5c1f3c0d84..6c0097d4a1915 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupKeyEncoderTests.java @@ -17,11 +17,16 @@ public class GroupKeyEncoderTests extends ComputeTestCase { + private GroupKeyEncoder encoder(int[] groupChannels, List elementTypes) { + BlockFactory bf = blockFactory(); + var scratch = new BreakingBytesRefBuilder(bf.breaker(), "group-key-encoder-test"); + return new GroupKeyEncoder(groupChannels, elementTypes, scratch); + } + public void testSameIntValuesSameKey() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newIntArrayVector(new int[] { 7, 7 }, 2).asBlock()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.INT))) { assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -31,8 +36,7 @@ public void testSameIntValuesSameKey() { public void testDifferentIntValuesDifferentKeys() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newIntArrayVector(new int[] { 1, 2 }, 2).asBlock()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.INT)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.INT))) { assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -42,8 +46,7 @@ public void testDifferentIntValuesDifferentKeys() { public void testLongType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newLongArrayVector(new long[] { 100, 200, 100 }, 3).asBlock()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.LONG))) { BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -57,8 +60,7 @@ public void testLongType() { public void testDoubleType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newDoubleArrayVector(new double[] { 1.5, 2.5, 1.5 }, 3).asBlock()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.DOUBLE)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.DOUBLE))) { BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -72,8 +74,7 @@ public void testDoubleType() { public void testFloatType() { BlockFactory bf = blockFactory(); Page page = new Page(bf.newFloatArrayVector(new float[] { 1.5f, 2.5f, 1.5f }, 3).asBlock()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.FLOAT)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.FLOAT))) { BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -91,8 +92,7 @@ public void testBooleanType() { builder.appendBoolean(false); builder.appendBoolean(true); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.BOOLEAN)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.BOOLEAN))) { BytesRef keyTrue = copy(encoder.encode(page, 0)); BytesRef keyFalse = copy(encoder.encode(page, 1)); BytesRef keyTrue2 = copy(encoder.encode(page, 2)); @@ -111,8 +111,7 @@ public void testBytesRefType() { builder.appendBytesRef(new BytesRef("world")); builder.appendBytesRef(new BytesRef("hello")); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.BYTES_REF)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.BYTES_REF))) { BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -134,8 +133,7 @@ public void testNullProducesDistinctKey() { builder.appendNull(); builder.appendLong(0); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.LONG))) { assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -159,8 +157,7 @@ public void testMultivalueOrderMatters() { builder.appendLong(1); builder.endPositionEntry(); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.LONG))) { assertNotEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -184,8 +181,7 @@ public void testMultivalueSameOrderSameKey() { builder.appendLong(2); builder.endPositionEntry(); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.LONG))) { assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); @@ -206,8 +202,7 @@ public void testCompositeKey() { bytesBuilder.appendBytesRef(new BytesRef("b")); bytesBuilder.appendBytesRef(new BytesRef("a")); Page page = new Page(longBuilder.build(), bytesBuilder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.BYTES_REF)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.BYTES_REF))) { BytesRef key0 = copy(encoder.encode(page, 0)); BytesRef key1 = copy(encoder.encode(page, 1)); BytesRef key2 = copy(encoder.encode(page, 2)); @@ -232,8 +227,7 @@ public void testSingleValueMatchesOneElementMultivalue() { builder.appendLong(42); builder.endPositionEntry(); Page page = new Page(builder.build()); - try { - GroupKeyEncoder encoder = new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)); + try (GroupKeyEncoder encoder = encoder(new int[] { 0 }, List.of(ElementType.LONG))) { assertEquals(copy(encoder.encode(page, 0)), copy(encoder.encode(page, 1))); } finally { page.releaseBlocks(); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index 73864ece8c9cd..e05dc84f6cbc6 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -107,7 +107,7 @@ public void testLimitPerGroupSinglePage() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 2, - new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -137,7 +137,7 @@ public void testLimitPerGroupAcrossPages() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 2, - new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -173,7 +173,7 @@ public void testEntirePageFiltered() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, - new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -198,7 +198,7 @@ public void testPagePassesThroughWhenAllAccepted() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 10, - new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -223,7 +223,7 @@ public void testLimitPerGroupZero() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 0, - new GroupKeyEncoder(new int[] { 0 }, List.of(ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), blockFactory ) ) { @@ -253,7 +253,7 @@ public void testMultipleGroupChannels() { try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, - new GroupKeyEncoder(new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), + groupKeyEncoder(blockFactory, new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), blockFactory ) ) { @@ -279,6 +279,141 @@ public void testMultipleGroupChannels() { } } + /** + * A multivalue {@code [1,2]} and a single value {@code 1} are different groups. + */ + public void testMultivalueAndSingleValueAreDifferentGroups() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + builder.appendLong(1); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(2)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * A null position and a multivalue {@code [1,2]} are different groups. + */ + public void testNullAndMultivalueAreDifferentGroups() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.appendNull(); + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(2)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * Two identical multivalues {@code [1,2]} belong to the same group. + */ + public void testIdenticalMultivaluesAreSameGroup() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(1)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * {@code [1,2]} and {@code [2,1]} are different groups because + * multivalues use order-sensitive list semantics. + */ + public void testMultivaluesWithDifferentOrderAreDifferentGroups() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(2); + builder.appendLong(1); + builder.endPositionEntry(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(2)); + } finally { + output.releaseBlocks(); + } + } + } + } + + private static GroupKeyEncoder groupKeyEncoder(BlockFactory blockFactory, int[] groupChannels, List elementTypes) { + return new GroupKeyEncoder(groupChannels, elementTypes, new BreakingBytesRefBuilder(blockFactory.breaker(), "group-key-encoder")); + } + @Override protected void assertStatus(Map map, List input, List output) { var emittedRows = output.stream().mapToInt(Page::getPositionCount).sum(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index ccfbf68763562..077b243a25102 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -28,6 +28,7 @@ import org.elasticsearch.compute.operator.AggregationOperator; import org.elasticsearch.compute.operator.AsyncOperator; import org.elasticsearch.compute.operator.DriverStatus; +import org.elasticsearch.compute.operator.GroupedLimitOperator; import org.elasticsearch.compute.operator.HashAggregationOperator; import org.elasticsearch.compute.operator.LimitOperator; import org.elasticsearch.compute.operator.MMROperator; @@ -392,6 +393,7 @@ public List getNamedWriteables() { entries.add(org.elasticsearch.xpack.esql.datasources.AsyncExternalSourceOperator.Status.ENTRY); entries.add(HashAggregationOperator.Status.ENTRY); entries.add(LimitOperator.Status.ENTRY); + entries.add(GroupedLimitOperator.Status.ENTRY); entries.add(LuceneOperator.Status.ENTRY); entries.add(TopNOperatorStatus.ENTRY); entries.add(MvExpandOperator.Status.ENTRY); From f3f05abeb4b42a6e82d99da3417696ae3e690f7c Mon Sep 17 00:00:00 2001 From: ncordon Date: Wed, 4 Mar 2026 18:51:46 +0100 Subject: [PATCH 12/14] Adds more multivalue tests --- .../operator/GroupedLimitOperatorTests.java | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index e05dc84f6cbc6..1715be6ea2d9a 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -410,6 +410,210 @@ public void testMultivaluesWithDifferentOrderAreDifferentGroups() { } } + /** + * A single-element multivalue {@code [1]} and a scalar {@code 1} are the + * same group because the block representation is identical (valueCount=1). + */ + public void testSingleElementMultivalueAndScalarAreSameGroup() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.endPositionEntry(); + builder.appendLong(1); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(1)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * A multivalue with duplicates {@code [1,1]} and a single value {@code [1]} + * are different groups because the value count differs. + */ + public void testMultivalueWithDuplicatesAndSingleValueAreDifferentGroups() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(1); + builder.endPositionEntry(); + builder.appendLong(1); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(2)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * Two identical multivalues with duplicates {@code [1,1]} are the same group. + */ + public void testIdenticalMultivaluesWithDuplicatesAreSameGroup() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(1); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(1); + builder.endPositionEntry(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(1)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * Multivalues of different lengths sharing a prefix are different groups: + * {@code [1,2,3]} vs {@code [1,2]}. + */ + public void testDifferentLengthMultivaluesAreDifferentGroups() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.appendLong(3); + builder.endPositionEntry(); + builder.beginPositionEntry(); + builder.appendLong(1); + builder.appendLong(2); + builder.endPositionEntry(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(2)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * Multivalue semantics apply within composite keys: {@code ([1,2], 10)}, + * {@code ([1,2], 20)}, and {@code ([2,1], 10)} are three distinct groups. + */ + public void testMultivalueInCompositeKey() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var mvBuilder = blockFactory.newLongBlockBuilder(3)) { + mvBuilder.beginPositionEntry(); + mvBuilder.appendLong(1); + mvBuilder.appendLong(2); + mvBuilder.endPositionEntry(); + mvBuilder.beginPositionEntry(); + mvBuilder.appendLong(1); + mvBuilder.appendLong(2); + mvBuilder.endPositionEntry(); + mvBuilder.beginPositionEntry(); + mvBuilder.appendLong(2); + mvBuilder.appendLong(1); + mvBuilder.endPositionEntry(); + + Page p = new Page( + mvBuilder.build(), + blockFactory.newLongArrayVector(new long[] { 10, 20, 10 }, 3).asBlock() + ); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(3)); + } finally { + output.releaseBlocks(); + } + } + } + } + + /** + * Two null positions belong to the same group. + */ + public void testTwoNullsAreSameGroup() { + DriverContext ctx = driverContext(); + BlockFactory blockFactory = ctx.blockFactory(); + try (var builder = blockFactory.newLongBlockBuilder(2)) { + builder.appendNull(); + builder.appendNull(); + + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(builder.build())); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(1)); + } finally { + output.releaseBlocks(); + } + } + } + } + private static GroupKeyEncoder groupKeyEncoder(BlockFactory blockFactory, int[] groupChannels, List elementTypes) { return new GroupKeyEncoder(groupChannels, elementTypes, new BreakingBytesRefBuilder(blockFactory.breaker(), "group-key-encoder")); } From a45aef80fe00eb85f85c8ef00bcb439a06571a79 Mon Sep 17 00:00:00 2001 From: ncordon Date: Wed, 4 Mar 2026 18:58:10 +0100 Subject: [PATCH 13/14] Last nit --- .../operator/GroupedLimitOperatorTests.java | 258 +++--------------- 1 file changed, 36 insertions(+), 222 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index 1715be6ea2d9a..103818538547e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -283,96 +283,21 @@ public void testMultipleGroupChannels() { * A multivalue {@code [1,2]} and a single value {@code 1} are different groups. */ public void testMultivalueAndSingleValueAreDifferentGroups() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - builder.appendLong(1); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(2)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(2, new long[] { 1, 2 }, new long[] { 1 }); } /** * A null position and a multivalue {@code [1,2]} are different groups. */ public void testNullAndMultivalueAreDifferentGroups() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.appendNull(); - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(2)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(2, null, new long[] { 1, 2 }); } /** * Two identical multivalues {@code [1,2]} belong to the same group. */ public void testIdenticalMultivaluesAreSameGroup() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(1)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(1, new long[] { 1, 2 }, new long[] { 1, 2 }); } /** @@ -380,34 +305,7 @@ public void testIdenticalMultivaluesAreSameGroup() { * multivalues use order-sensitive list semantics. */ public void testMultivaluesWithDifferentOrderAreDifferentGroups() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - builder.beginPositionEntry(); - builder.appendLong(2); - builder.appendLong(1); - builder.endPositionEntry(); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(2)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(2, new long[] { 1, 2 }, new long[] { 2, 1 }); } /** @@ -415,96 +313,22 @@ public void testMultivaluesWithDifferentOrderAreDifferentGroups() { * same group because the block representation is identical (valueCount=1). */ public void testSingleElementMultivalueAndScalarAreSameGroup() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.endPositionEntry(); - builder.appendLong(1); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(1)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(1, new long[] { 1 }, new long[] { 1 }); } /** - * A multivalue with duplicates {@code [1,1]} and a single value {@code [1]} + * A multivalue with duplicates {@code [1,1]} and a single value {@code 1} * are different groups because the value count differs. */ public void testMultivalueWithDuplicatesAndSingleValueAreDifferentGroups() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(1); - builder.endPositionEntry(); - builder.appendLong(1); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(2)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(2, new long[] { 1, 1 }, new long[] { 1 }); } /** * Two identical multivalues with duplicates {@code [1,1]} are the same group. */ public void testIdenticalMultivaluesWithDuplicatesAreSameGroup() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(1); - builder.endPositionEntry(); - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(1); - builder.endPositionEntry(); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(1)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(1, new long[] { 1, 1 }, new long[] { 1, 1 }); } /** @@ -512,35 +336,7 @@ public void testIdenticalMultivaluesWithDuplicatesAreSameGroup() { * {@code [1,2,3]} vs {@code [1,2]}. */ public void testDifferentLengthMultivaluesAreDifferentGroups() { - DriverContext ctx = driverContext(); - BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.appendLong(3); - builder.endPositionEntry(); - builder.beginPositionEntry(); - builder.appendLong(1); - builder.appendLong(2); - builder.endPositionEntry(); - - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(2)); - } finally { - output.releaseBlocks(); - } - } - } + assertDistinctGroups(2, new long[] { 1, 2, 3 }, new long[] { 1, 2 }); } /** @@ -564,10 +360,7 @@ public void testMultivalueInCompositeKey() { mvBuilder.appendLong(1); mvBuilder.endPositionEntry(); - Page p = new Page( - mvBuilder.build(), - blockFactory.newLongArrayVector(new long[] { 10, 20, 10 }, 3).asBlock() - ); + Page p = new Page(mvBuilder.build(), blockFactory.newLongArrayVector(new long[] { 10, 20, 10 }, 3).asBlock()); try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, @@ -590,12 +383,33 @@ public void testMultivalueInCompositeKey() { * Two null positions belong to the same group. */ public void testTwoNullsAreSameGroup() { + assertDistinctGroups(1, null, null); + } + + /** + * Feeds the given values through a {@code limit=1} single-channel operator + * and asserts the number of output positions (i.e. distinct groups). + * Each {@code long[]} entry represents one position: {@code null} for a null + * position, a single-element array for a scalar, and a multi-element array + * for a multivalue. + */ + private void assertDistinctGroups(int expectedGroups, long[]... values) { DriverContext ctx = driverContext(); BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(2)) { - builder.appendNull(); - builder.appendNull(); - + try (var builder = blockFactory.newLongBlockBuilder(values.length)) { + for (long[] value : values) { + if (value == null) { + builder.appendNull(); + } else if (value.length == 1) { + builder.appendLong(value[0]); + } else { + builder.beginPositionEntry(); + for (long val : value) { + builder.appendLong(val); + } + builder.endPositionEntry(); + } + } try ( GroupedLimitOperator op = new GroupedLimitOperator( 1, @@ -606,7 +420,7 @@ public void testTwoNullsAreSameGroup() { op.addInput(new Page(builder.build())); Page output = op.getOutput(); try { - assertThat(output.getPositionCount(), equalTo(1)); + assertThat(output.getPositionCount(), equalTo(expectedGroups)); } finally { output.releaseBlocks(); } From 88664e7585d5d6ae3d7dc07656fecfc587170b6d Mon Sep 17 00:00:00 2001 From: ncordon Date: Wed, 4 Mar 2026 20:07:13 +0100 Subject: [PATCH 14/14] Addresses last bits of feedback, hopefully --- .../operator/GroupedLimitOperator.java | 6 +- .../operator/GroupedLimitOperatorTests.java | 142 ++++++++---------- 2 files changed, 63 insertions(+), 85 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java index 59a710d71f2bc..58647a98af1e8 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/GroupedLimitOperator.java @@ -71,8 +71,8 @@ public String describe() { *
  • a counts array to keep track of the number of times we have seen each {@code Ordinal(key)}
  • * */ - private final BytesRefHashTable seenKeys; - private final BigArrays bigArrays; + private BytesRefHashTable seenKeys; + private BigArrays bigArrays; private IntArray counts; private int pagesProcessed; @@ -93,7 +93,7 @@ public GroupedLimitOperator(int limitPerGroup, GroupKeyEncoder keyEncoder, Block success = true; } finally { if (success == false) { - keyEncoder.close(); + Releasables.closeExpectNoException(keyEncoder, seenKeys); } } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java index 103818538547e..eefea9c5089d6 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/GroupedLimitOperatorTests.java @@ -11,10 +11,12 @@ import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.test.BlockTestUtils; import org.elasticsearch.compute.test.OperatorTestCase; import org.elasticsearch.compute.test.operator.blocksource.SequenceLongBlockSourceOperator; import org.hamcrest.Matcher; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.LongStream; @@ -66,7 +68,7 @@ public void testStatus() { assertThat(status.rowsReceived(), equalTo(0L)); assertThat(status.rowsEmitted(), equalTo(0L)); - Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 1 }, 3).asBlock()); + Page p = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 1L))); op.addInput(p); Page output = op.getOutput(); try { @@ -87,7 +89,7 @@ public void testNeedInput() { BlockFactory blockFactory = ctx.blockFactory(); try (GroupedLimitOperator op = simple(SimpleOptions.DEFAULT).get(ctx)) { assertTrue(op.needsInput()); - Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + Page p = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 3L))); op.addInput(p); assertFalse(op.needsInput()); op.getOutput().releaseBlocks(); @@ -111,7 +113,7 @@ public void testLimitPerGroupSinglePage() { blockFactory ) ) { - Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 1, 1, 2 }, 5).asBlock()); + Page p = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 1L, 1L, 2L))); op.addInput(p); Page output = op.getOutput(); try { @@ -141,7 +143,7 @@ public void testLimitPerGroupAcrossPages() { blockFactory ) ) { - Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 2 }, 3).asBlock()); + Page p1 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 1L, 2L))); op.addInput(p1); Page out1 = op.getOutput(); try { @@ -150,7 +152,7 @@ public void testLimitPerGroupAcrossPages() { out1.releaseBlocks(); } - Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 2 }, 3).asBlock()); + Page p2 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 2L))); op.addInput(p2); Page out2 = op.getOutput(); try { @@ -177,7 +179,7 @@ public void testEntirePageFiltered() { blockFactory ) ) { - Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1 }, 1).asBlock()); + Page p1 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L))); op.addInput(p1); Page out1 = op.getOutput(); try { @@ -186,7 +188,7 @@ public void testEntirePageFiltered() { out1.releaseBlocks(); } - Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 1 }, 3).asBlock()); + Page p2 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 1L, 1L))); op.addInput(p2); assertThat(op.getOutput(), nullValue()); } @@ -202,7 +204,7 @@ public void testPagePassesThroughWhenAllAccepted() { blockFactory ) ) { - Page p = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + Page p = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 3L))); op.addInput(p); Page output = op.getOutput(); try { @@ -227,11 +229,11 @@ public void testLimitPerGroupZero() { blockFactory ) ) { - Page p1 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 2, 3 }, 3).asBlock()); + Page p1 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 2L, 3L))); op.addInput(p1); assertThat(op.getOutput(), nullValue()); - Page p2 = new Page(blockFactory.newLongArrayVector(new long[] { 1, 1, 2 }, 3).asBlock()); + Page p2 = new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 1L, 2L))); op.addInput(p2); assertThat(op.getOutput(), nullValue()); @@ -258,8 +260,8 @@ public void testMultipleGroupChannels() { ) ) { Page p = new Page( - blockFactory.newLongArrayVector(new long[] { 1, 1, 1, 2 }, 4).asBlock(), - blockFactory.newLongArrayVector(new long[] { 10, 20, 10, 10 }, 4).asBlock() + BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(1L, 1L, 1L, 2L)), + BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(10L, 20L, 10L, 10L)) ); op.addInput(p); Page output = op.getOutput(); @@ -283,21 +285,21 @@ public void testMultipleGroupChannels() { * A multivalue {@code [1,2]} and a single value {@code 1} are different groups. */ public void testMultivalueAndSingleValueAreDifferentGroups() { - assertDistinctGroups(2, new long[] { 1, 2 }, new long[] { 1 }); + assertDistinctGroups(2, List.of(1L, 2L), 1L); } /** * A null position and a multivalue {@code [1,2]} are different groups. */ public void testNullAndMultivalueAreDifferentGroups() { - assertDistinctGroups(2, null, new long[] { 1, 2 }); + assertDistinctGroups(2, null, List.of(1L, 2L)); } /** * Two identical multivalues {@code [1,2]} belong to the same group. */ public void testIdenticalMultivaluesAreSameGroup() { - assertDistinctGroups(1, new long[] { 1, 2 }, new long[] { 1, 2 }); + assertDistinctGroups(1, List.of(1L, 2L), List.of(1L, 2L)); } /** @@ -305,7 +307,7 @@ public void testIdenticalMultivaluesAreSameGroup() { * multivalues use order-sensitive list semantics. */ public void testMultivaluesWithDifferentOrderAreDifferentGroups() { - assertDistinctGroups(2, new long[] { 1, 2 }, new long[] { 2, 1 }); + assertDistinctGroups(2, List.of(1L, 2L), List.of(2L, 1L)); } /** @@ -313,7 +315,7 @@ public void testMultivaluesWithDifferentOrderAreDifferentGroups() { * same group because the block representation is identical (valueCount=1). */ public void testSingleElementMultivalueAndScalarAreSameGroup() { - assertDistinctGroups(1, new long[] { 1 }, new long[] { 1 }); + assertDistinctGroups(1, 1L, 1L); } /** @@ -321,14 +323,14 @@ public void testSingleElementMultivalueAndScalarAreSameGroup() { * are different groups because the value count differs. */ public void testMultivalueWithDuplicatesAndSingleValueAreDifferentGroups() { - assertDistinctGroups(2, new long[] { 1, 1 }, new long[] { 1 }); + assertDistinctGroups(2, List.of(1L, 1L), 1L); } /** * Two identical multivalues with duplicates {@code [1,1]} are the same group. */ public void testIdenticalMultivaluesWithDuplicatesAreSameGroup() { - assertDistinctGroups(1, new long[] { 1, 1 }, new long[] { 1, 1 }); + assertDistinctGroups(1, List.of(1L, 1L), List.of(1L, 1L)); } /** @@ -336,45 +338,37 @@ public void testIdenticalMultivaluesWithDuplicatesAreSameGroup() { * {@code [1,2,3]} vs {@code [1,2]}. */ public void testDifferentLengthMultivaluesAreDifferentGroups() { - assertDistinctGroups(2, new long[] { 1, 2, 3 }, new long[] { 1, 2 }); + assertDistinctGroups(2, List.of(1L, 2L, 3L), List.of(1L, 2L)); } /** * Multivalue semantics apply within composite keys: {@code ([1,2], 10)}, - * {@code ([1,2], 20)}, and {@code ([2,1], 10)} are three distinct groups. + * {@code ([1,2], 20)}, {@code ([2,1], 10)} and {@code ([2,1], 10)} are three distinct groups. */ public void testMultivalueInCompositeKey() { DriverContext ctx = driverContext(); BlockFactory blockFactory = ctx.blockFactory(); - try (var mvBuilder = blockFactory.newLongBlockBuilder(3)) { - mvBuilder.beginPositionEntry(); - mvBuilder.appendLong(1); - mvBuilder.appendLong(2); - mvBuilder.endPositionEntry(); - mvBuilder.beginPositionEntry(); - mvBuilder.appendLong(1); - mvBuilder.appendLong(2); - mvBuilder.endPositionEntry(); - mvBuilder.beginPositionEntry(); - mvBuilder.appendLong(2); - mvBuilder.appendLong(1); - mvBuilder.endPositionEntry(); - - Page p = new Page(mvBuilder.build(), blockFactory.newLongArrayVector(new long[] { 10, 20, 10 }, 3).asBlock()); - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(p); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(3)); - } finally { - output.releaseBlocks(); - } + Page p = new Page( + BlockTestUtils.asBlock( + blockFactory, + ElementType.LONG, + List.of(List.of(1L, 2L), List.of(1L, 2L), List.of(2L, 1L), List.of(2L, 1L)) + ), + BlockTestUtils.asBlock(blockFactory, ElementType.LONG, List.of(10L, 20L, 10L, 10L)) + ); + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0, 1 }, List.of(ElementType.LONG, ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(p); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(3)); + } finally { + output.releaseBlocks(); } } } @@ -389,41 +383,25 @@ public void testTwoNullsAreSameGroup() { /** * Feeds the given values through a {@code limit=1} single-channel operator * and asserts the number of output positions (i.e. distinct groups). - * Each {@code long[]} entry represents one position: {@code null} for a null - * position, a single-element array for a scalar, and a multi-element array - * for a multivalue. + * Each entry can be: {@code null} for a null position, a {@code Long} scalar, + * or a {@code List} for a multivalue. */ - private void assertDistinctGroups(int expectedGroups, long[]... values) { + private void assertDistinctGroups(int expectedGroups, Object... values) { DriverContext ctx = driverContext(); BlockFactory blockFactory = ctx.blockFactory(); - try (var builder = blockFactory.newLongBlockBuilder(values.length)) { - for (long[] value : values) { - if (value == null) { - builder.appendNull(); - } else if (value.length == 1) { - builder.appendLong(value[0]); - } else { - builder.beginPositionEntry(); - for (long val : value) { - builder.appendLong(val); - } - builder.endPositionEntry(); - } - } - try ( - GroupedLimitOperator op = new GroupedLimitOperator( - 1, - groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), - blockFactory - ) - ) { - op.addInput(new Page(builder.build())); - Page output = op.getOutput(); - try { - assertThat(output.getPositionCount(), equalTo(expectedGroups)); - } finally { - output.releaseBlocks(); - } + try ( + GroupedLimitOperator op = new GroupedLimitOperator( + 1, + groupKeyEncoder(blockFactory, new int[] { 0 }, List.of(ElementType.LONG)), + blockFactory + ) + ) { + op.addInput(new Page(BlockTestUtils.asBlock(blockFactory, ElementType.LONG, Arrays.asList(values)))); + Page output = op.getOutput(); + try { + assertThat(output.getPositionCount(), equalTo(expectedGroups)); + } finally { + output.releaseBlocks(); } } }