-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Push DictionaryBlock through remote partitioned exchange #14937
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,13 @@ | |
| import io.trino.spi.block.DictionaryBlock; | ||
| import io.trino.spi.block.RunLengthEncodedBlock; | ||
| import it.unimi.dsi.fastutil.ints.IntArrayList; | ||
| import it.unimi.dsi.fastutil.ints.IntArrays; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
| import static io.airlift.slice.SizeOf.instanceSize; | ||
| import static io.trino.operator.output.PositionsAppenderUtil.calculateBlockResetSize; | ||
| import static io.trino.operator.output.PositionsAppenderUtil.calculateNewArraySize; | ||
| import static java.lang.Math.max; | ||
| import static java.util.Objects.requireNonNull; | ||
|
|
||
| /** | ||
|
|
@@ -30,10 +35,12 @@ public class UnnestingPositionsAppender | |
| private static final int INSTANCE_SIZE = instanceSize(UnnestingPositionsAppender.class); | ||
|
|
||
| private final PositionsAppender delegate; | ||
| private DictionaryBlockBuilder dictionaryBlockBuilder; | ||
|
|
||
| public UnnestingPositionsAppender(PositionsAppender delegate) | ||
| { | ||
| this.delegate = requireNonNull(delegate, "delegate is null"); | ||
| this.dictionaryBlockBuilder = new DictionaryBlockBuilder(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -43,12 +50,14 @@ public void append(IntArrayList positions, Block source) | |
| return; | ||
| } | ||
| if (source instanceof RunLengthEncodedBlock) { | ||
| dictionaryBlockBuilder.flushDictionary(delegate); | ||
| delegate.appendRle(((RunLengthEncodedBlock) source).getValue(), positions.size()); | ||
| } | ||
|
lukasz-stec marked this conversation as resolved.
Outdated
|
||
| else if (source instanceof DictionaryBlock) { | ||
| appendDictionary(positions, (DictionaryBlock) source); | ||
| } | ||
| else { | ||
| dictionaryBlockBuilder.flushDictionary(delegate); | ||
| delegate.append(positions, source); | ||
| } | ||
| } | ||
|
|
@@ -59,12 +68,15 @@ public void appendRle(Block block, int rlePositionCount) | |
| if (rlePositionCount == 0) { | ||
| return; | ||
| } | ||
|
raunaqmorarka marked this conversation as resolved.
Outdated
|
||
| dictionaryBlockBuilder.flushDictionary(delegate); | ||
| delegate.appendRle(block, rlePositionCount); | ||
| } | ||
|
|
||
| @Override | ||
| public void append(int position, Block source) | ||
| { | ||
| dictionaryBlockBuilder.flushDictionary(delegate); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is dictionary flushed in every case here but in the other
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we flush in every case other than we have the same dictionary ( |
||
|
|
||
| if (source instanceof RunLengthEncodedBlock runLengthEncodedBlock) { | ||
| delegate.append(0, runLengthEncodedBlock.getValue()); | ||
| } | ||
|
|
@@ -79,13 +91,21 @@ else if (source instanceof DictionaryBlock dictionaryBlock) { | |
| @Override | ||
| public Block build() | ||
| { | ||
| return delegate.build(); | ||
| Block result; | ||
| if (dictionaryBlockBuilder.isEmpty()) { | ||
| result = delegate.build(); | ||
| } | ||
| else { | ||
| result = dictionaryBlockBuilder.build(); | ||
| } | ||
| dictionaryBlockBuilder = dictionaryBlockBuilder.newBuilderLike(); | ||
| return result; | ||
| } | ||
|
|
||
| @Override | ||
| public long getRetainedSizeInBytes() | ||
| { | ||
| return INSTANCE_SIZE + delegate.getRetainedSizeInBytes(); | ||
| return INSTANCE_SIZE + delegate.getRetainedSizeInBytes() + dictionaryBlockBuilder.getRetainedSizeInBytes(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -96,15 +116,114 @@ public long getSizeInBytes() | |
|
|
||
| private void appendDictionary(IntArrayList positions, DictionaryBlock source) | ||
| { | ||
| delegate.append(mapPositions(positions, source), source.getDictionary()); | ||
| Block dictionary = source.getDictionary(); | ||
| IntArrayList dictionaryPositions = getDictionaryPositions(positions, source); | ||
| if (dictionaryBlockBuilder.canAppend(dictionary)) { | ||
| dictionaryBlockBuilder.append(dictionaryPositions, dictionary); | ||
| } | ||
| else { | ||
| dictionaryBlockBuilder.flushDictionary(delegate); | ||
| delegate.append(dictionaryPositions, dictionary); | ||
| } | ||
| } | ||
|
|
||
| private IntArrayList mapPositions(IntArrayList positions, DictionaryBlock block) | ||
| private IntArrayList getDictionaryPositions(IntArrayList positions, DictionaryBlock block) | ||
| { | ||
| int[] positionArray = new int[positions.size()]; | ||
| for (int i = 0; i < positions.size(); i++) { | ||
| positionArray[i] = block.getId(positions.getInt(i)); | ||
| } | ||
| return IntArrayList.wrap(positionArray); | ||
| } | ||
|
|
||
| private static class DictionaryBlockBuilder | ||
| { | ||
| private static final int INSTANCE_SIZE = instanceSize(DictionaryBlockBuilder.class); | ||
| private final int initialEntryCount; | ||
| private Block dictionary; | ||
| private int[] dictionaryIds; | ||
| private int positionCount; | ||
| private boolean closed; | ||
|
|
||
| public DictionaryBlockBuilder() | ||
| { | ||
| this(1024); | ||
|
raunaqmorarka marked this conversation as resolved.
|
||
| } | ||
|
|
||
| public DictionaryBlockBuilder(int initialEntryCount) | ||
| { | ||
| this.initialEntryCount = initialEntryCount; | ||
| this.dictionaryIds = new int[0]; | ||
| } | ||
|
|
||
| public boolean isEmpty() | ||
| { | ||
| return positionCount == 0; | ||
| } | ||
|
|
||
| public Block build() | ||
| { | ||
| return DictionaryBlock.create(positionCount, dictionary, dictionaryIds); | ||
| } | ||
|
|
||
| public long getRetainedSizeInBytes() | ||
| { | ||
| return INSTANCE_SIZE | ||
| + (long) dictionaryIds.length * Integer.BYTES | ||
| + (dictionary != null ? dictionary.getRetainedSizeInBytes() : 0); | ||
| } | ||
|
|
||
| public boolean canAppend(Block dictionary) | ||
| { | ||
| return !closed && (dictionary == this.dictionary || this.dictionary == null); | ||
|
lukasz-stec marked this conversation as resolved.
Outdated
|
||
| } | ||
|
|
||
| public void append(IntArrayList mappedPositions, Block dictionary) | ||
| { | ||
| checkArgument(canAppend(dictionary)); | ||
| this.dictionary = dictionary; | ||
| ensureCapacity(positionCount + mappedPositions.size()); | ||
| System.arraycopy(mappedPositions.elements(), 0, dictionaryIds, positionCount, mappedPositions.size()); | ||
| positionCount += mappedPositions.size(); | ||
| } | ||
|
|
||
| public void flushDictionary(PositionsAppender delegate) | ||
| { | ||
| if (closed) { | ||
|
raunaqmorarka marked this conversation as resolved.
Outdated
|
||
| return; | ||
| } | ||
| if (positionCount > 0) { | ||
| requireNonNull(dictionary, () -> "dictionary is null but we have pending dictionaryIds " + positionCount); | ||
| delegate.append(IntArrayList.wrap(dictionaryIds, positionCount), dictionary); | ||
| } | ||
|
|
||
| closed = true; | ||
| dictionaryIds = new int[0]; | ||
| positionCount = 0; | ||
| dictionary = null; | ||
| } | ||
|
|
||
| public DictionaryBlockBuilder newBuilderLike() | ||
| { | ||
| return new DictionaryBlockBuilder(max(calculateBlockResetSize(positionCount), initialEntryCount)); | ||
| } | ||
|
|
||
| private void ensureCapacity(int capacity) | ||
| { | ||
| if (dictionaryIds.length >= capacity) { | ||
| return; | ||
| } | ||
|
|
||
| int newSize; | ||
| if (dictionaryIds.length > 0) { | ||
| newSize = calculateNewArraySize(dictionaryIds.length); | ||
| } | ||
| else { | ||
| newSize = initialEntryCount; | ||
| } | ||
| newSize = Math.max(newSize, capacity); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any possibility that
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well yes, i.e.,
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just do ofc this will work different that now. If capacity will 100, it will create an array of size 150.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we do not want to go over |
||
|
|
||
| dictionaryIds = IntArrays.ensureCapacity(dictionaryIds, newSize, positionCount); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep state top-level as in
io.trino.operator.output.RleAwarePositionsAppenderThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this class has now two responsibilities (unnesting and building a dictionary) it's more readable to separate them. It also makes it easier to reset the state of the appender.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This class doesn't really unnest much now.
RleAwayPositionsAppendercould do:itself really at this point, so
UnnestingPositionsAppenderwould be all about dictionariesThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RleAwayPositionsAppenderis not always there. Unnesting part is actually to make sure only flat blocks are passed down to the flat appenders.Maybe we should merge
UnnestingPositionsAppenderandRleAwayPositionsAppenderintoBlockTypeAwarePositionsAppender? although I fear it would make the code messier.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why wouldn't it be always there?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it's not needed if the type is not comparable
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it's worth extra complexity since there are not many types like that.
However. You could then have minimal
UnnestingPositionsAppenderwithout rle or dict builder support.I don't mixing of current
UnnestingPositionsAppenderwith dictionary awareness is needed