-
Notifications
You must be signed in to change notification settings - Fork 25.8k
[ES|QL] Convert PackedValuesBlockHash.bytes to BreakingBytesRefBuilder for better memory tracking
#140171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
fang-xing-esql
merged 12 commits into
elastic:main
from
fang-xing-esql:PackedValuesBlockHash-BreakingBytesRefBuilder
Jan 9, 2026
Merged
[ES|QL] Convert PackedValuesBlockHash.bytes to BreakingBytesRefBuilder for better memory tracking
#140171
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
1ac55ed
converted PackedValuesBlockHash.bytes to BreakingBytesRefBuilder
fang-xing-esql 7e718a7
Update docs/changelog/140171.yaml
fang-xing-esql ce40be4
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql 8412dc3
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql a376045
use dedicated circuit breaker for PackedValuesBlockHash.bytes in test
fang-xing-esql 6dd8996
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql dc15e94
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql 3575999
refactor PackedValuesBlockHash's constructor
fang-xing-esql 541ee01
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql ec03b48
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql 40a15d5
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql e602617
Merge branch 'main' into PackedValuesBlockHash-BreakingBytesRefBuilder
fang-xing-esql File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 140171 | ||
| summary: Converted `PackedValuesBlockHash.bytes` to `BreakingBytesRefBuilder` for | ||
| better memory tracking | ||
| area: ES|QL | ||
| type: enhancement | ||
| issues: [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,7 @@ | |
| package org.elasticsearch.compute.aggregation.blockhash; | ||
|
|
||
| import org.apache.lucene.util.BytesRef; | ||
| import org.apache.lucene.util.BytesRefBuilder; | ||
| import org.elasticsearch.common.breaker.CircuitBreaker; | ||
| import org.elasticsearch.common.unit.ByteSizeValue; | ||
| import org.elasticsearch.common.util.BigArrays; | ||
| import org.elasticsearch.common.util.BitArray; | ||
|
|
@@ -21,6 +21,7 @@ | |
| import org.elasticsearch.compute.data.IntBlock; | ||
| import org.elasticsearch.compute.data.IntVector; | ||
| import org.elasticsearch.compute.data.Page; | ||
| import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; | ||
| import org.elasticsearch.compute.operator.mvdedupe.BatchEncoder; | ||
| import org.elasticsearch.compute.operator.mvdedupe.MultivalueDedupe; | ||
| import org.elasticsearch.core.Releasable; | ||
|
|
@@ -63,16 +64,33 @@ final class PackedValuesBlockHash extends BlockHash { | |
| private final int emitBatchSize; | ||
| private final BytesRefHashTable bytesRefHash; | ||
| private final int nullTrackingBytes; | ||
| private final BytesRefBuilder bytes = new BytesRefBuilder(); | ||
| private final BreakingBytesRefBuilder bytes; | ||
| private final List<GroupSpec> specs; | ||
|
|
||
| PackedValuesBlockHash(List<GroupSpec> specs, BlockFactory blockFactory, int emitBatchSize) { | ||
| this(specs, blockFactory, blockFactory.breaker(), emitBatchSize); | ||
| } | ||
|
|
||
| /* | ||
| * This constructor is also used by {@code PackedValuesBlockHashCircuitBreakerTests} to provide different circuit breakers | ||
| * to bytesRefHash and bytes. Production code should use the primary constructor above and provide same breaker for both. | ||
| */ | ||
| PackedValuesBlockHash(List<GroupSpec> specs, BlockFactory blockFactory, CircuitBreaker circuitBreaker, int emitBatchSize) { | ||
| super(blockFactory); | ||
| this.specs = specs; | ||
| this.emitBatchSize = emitBatchSize; | ||
| this.bytesRefHash = HashImplFactory.newBytesRefHash(blockFactory); | ||
| this.nullTrackingBytes = (specs.size() + 7) / 8; | ||
| bytes.grow(nullTrackingBytes); | ||
| boolean success = false; | ||
| try { | ||
| this.bytesRefHash = HashImplFactory.newBytesRefHash(blockFactory); | ||
| this.bytes = new BreakingBytesRefBuilder(circuitBreaker, "PackedValuesBlockHash", this.nullTrackingBytes); | ||
| success = true; | ||
| } finally { | ||
| // close bytesRefHash and bytes to prevent memory leaks in case of the initialization fails | ||
| if (success == false) { | ||
| close(); | ||
| } | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is valid and fine, but we usually write this as: Mostly out of paranoia around eating the stack trace for |
||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -147,14 +165,14 @@ void add() { | |
|
|
||
| private void addSingleEntry() { | ||
| fillBytesSv(groups); | ||
| appendOrdSv(position, Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.get())))); | ||
| appendOrdSv(position, Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.bytesRefView())))); | ||
| } | ||
|
|
||
| private void addMultipleEntries() { | ||
| int g = 0; | ||
| do { | ||
| fillBytesMv(groups, g); | ||
| appendOrdInMv(position, Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.get())))); | ||
| appendOrdInMv(position, Math.toIntExact(hashOrdToGroup(bytesRefHash.add(bytes.bytesRefView())))); | ||
| g = rewindKeys(groups); | ||
| } while (g >= 0); | ||
| finishMv(); | ||
|
|
@@ -216,7 +234,7 @@ public IntBlock next() { | |
|
|
||
| private void lookupSingleEntry(IntBlock.Builder ords) { | ||
| fillBytesSv(groups); | ||
| long found = bytesRefHash.find(bytes.get()); | ||
| long found = bytesRefHash.find(bytes.bytesRefView()); | ||
| if (found < 0) { | ||
| ords.appendNull(); | ||
| } else { | ||
|
|
@@ -233,7 +251,7 @@ private void lookupMultipleEntries(IntBlock.Builder ords) { | |
| fillBytesMv(groups, g); | ||
|
|
||
| // emit ords | ||
| long found = bytesRefHash.find(bytes.get()); | ||
| long found = bytesRefHash.find(bytes.bytesRefView()); | ||
| if (found >= 0) { | ||
| if (firstFound < 0) { | ||
| firstFound = found; | ||
|
|
@@ -413,7 +431,7 @@ public BitArray seenGroupIds(BigArrays bigArrays) { | |
|
|
||
| @Override | ||
| public void close() { | ||
| bytesRefHash.close(); | ||
| Releasables.close(bytesRefHash, bytes); | ||
| } | ||
|
|
||
| @Override | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
...elasticsearch/compute/aggregation/blockhash/PackedValuesBlockHashCircuitBreakerTests.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| package org.elasticsearch.compute.aggregation.blockhash; | ||
|
|
||
| import org.apache.lucene.util.BytesRef; | ||
| import org.elasticsearch.common.breaker.CircuitBreaker; | ||
| import org.elasticsearch.common.breaker.CircuitBreakingException; | ||
| import org.elasticsearch.common.breaker.NoopCircuitBreaker; | ||
| import org.elasticsearch.common.unit.ByteSizeValue; | ||
| import org.elasticsearch.common.util.BigArrays; | ||
| import org.elasticsearch.common.util.MockBigArrays; | ||
| import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; | ||
| import org.elasticsearch.compute.data.Block; | ||
| import org.elasticsearch.compute.data.BlockFactory; | ||
| import org.elasticsearch.compute.data.BytesRefBlock; | ||
| import org.elasticsearch.compute.data.ElementType; | ||
| import org.elasticsearch.compute.data.IntArrayBlock; | ||
| import org.elasticsearch.compute.data.IntBigArrayBlock; | ||
| import org.elasticsearch.compute.data.IntVector; | ||
| import org.elasticsearch.compute.data.Page; | ||
|
|
||
| import java.util.ArrayList; | ||
| import java.util.List; | ||
|
|
||
| import static org.elasticsearch.common.util.MockBigArrays.ERROR_MESSAGE; | ||
| import static org.hamcrest.Matchers.equalTo; | ||
|
|
||
| public class PackedValuesBlockHashCircuitBreakerTests extends BlockHashTestCase { | ||
|
|
||
| /** | ||
| * Set the breaker limit low enough, and test that adding many(1000) groups of BYTES_REF into bytes {@code BreakingBytesRefBuilder} | ||
| * , which is reused for each grouping set, will trigger CBE. CBE happens when adding around 11th group to bytes. | ||
| */ | ||
| public void testCircuitBreakerWithManyGroups() { | ||
| CircuitBreaker bytesBreaker = new MockBigArrays.LimitedBreaker(CircuitBreaker.REQUEST, ByteSizeValue.ofKb(1)); | ||
| BlockFactory blockFactory = BlockFactory.getInstance(new NoopCircuitBreaker("test"), BigArrays.NON_RECYCLING_INSTANCE); | ||
|
|
||
| // 1000 group keys of BYTES_REF | ||
| List<BlockHash.GroupSpec> groupSpecs = new ArrayList<>(); | ||
| for (int i = 0; i < 1000; i++) { | ||
| groupSpecs.add(new BlockHash.GroupSpec(i, ElementType.BYTES_REF)); | ||
| } | ||
|
|
||
| try ( | ||
| PackedValuesBlockHash blockHash = new PackedValuesBlockHash(groupSpecs, blockFactory, bytesBreaker, 32); | ||
| BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(1) | ||
| ) { | ||
| builder.appendBytesRef(new BytesRef("test")); | ||
| Block block = builder.build(); | ||
| Block[] blocks = new Block[1000]; | ||
| for (int i = 0; i < 1000; i++) { | ||
| blocks[i] = block; | ||
| } | ||
| Page page = new Page(blocks); | ||
|
|
||
| CircuitBreakingException e = expectThrows( | ||
| CircuitBreakingException.class, | ||
| () -> blockHash.add(page, new GroupingAggregatorFunction.AddInput() { | ||
| @Override | ||
| public void add(int positionOffset, IntArrayBlock groupIds) {} | ||
|
|
||
| @Override | ||
| public void add(int positionOffset, IntBigArrayBlock groupIds) {} | ||
|
|
||
| @Override | ||
| public void add(int positionOffset, IntVector groupIds) {} | ||
|
|
||
| @Override | ||
| public void close() {} | ||
| }) | ||
| ); | ||
| assertThat(e.getMessage(), equalTo(ERROR_MESSAGE)); | ||
| } | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The primary ctor can call this one like
this(specs, blockFactory, blockFactory.circiutBreaker(), emitBatchSize).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! More refactor is needed. :)