-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Implement flushing for partial TopNOperator #10166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| package io.trino.operator; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import io.airlift.units.DataSize; | ||
| import io.trino.memory.context.MemoryTrackingContext; | ||
| import io.trino.operator.BasicWorkProcessorOperatorAdapter.BasicAdapterWorkProcessorOperatorFactory; | ||
| import io.trino.operator.WorkProcessor.TransformationState; | ||
|
|
@@ -24,6 +25,7 @@ | |
| import io.trino.sql.planner.plan.PlanNodeId; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Optional; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkState; | ||
| import static io.trino.operator.BasicWorkProcessorOperatorAdapter.createAdapterOperatorFactory; | ||
|
|
@@ -42,9 +44,10 @@ public static OperatorFactory createOperatorFactory( | |
| int n, | ||
| List<Integer> sortChannels, | ||
| List<SortOrder> sortOrders, | ||
| TypeOperators typeOperators) | ||
| TypeOperators typeOperators, | ||
| Optional<DataSize> maxPartialMemory) | ||
| { | ||
| return createAdapterOperatorFactory(new Factory(operatorId, planNodeId, types, n, sortChannels, sortOrders, typeOperators)); | ||
| return createAdapterOperatorFactory(new Factory(operatorId, planNodeId, types, n, sortChannels, sortOrders, typeOperators, maxPartialMemory)); | ||
| } | ||
|
|
||
| private static class Factory | ||
|
|
@@ -57,6 +60,7 @@ private static class Factory | |
| private final List<Integer> sortChannels; | ||
| private final List<SortOrder> sortOrders; | ||
| private final TypeOperators typeOperators; | ||
| private final Optional<DataSize> maxPartialMemory; | ||
| private boolean closed; | ||
|
|
||
| private Factory( | ||
|
|
@@ -66,7 +70,8 @@ private Factory( | |
| int n, | ||
| List<Integer> sortChannels, | ||
| List<SortOrder> sortOrders, | ||
| TypeOperators typeOperators) | ||
| TypeOperators typeOperators, | ||
| Optional<DataSize> maxPartialMemory) | ||
| { | ||
| this.operatorId = operatorId; | ||
| this.planNodeId = requireNonNull(planNodeId, "planNodeId is null"); | ||
|
|
@@ -75,6 +80,7 @@ private Factory( | |
| this.sortChannels = ImmutableList.copyOf(requireNonNull(sortChannels, "sortChannels is null")); | ||
| this.sortOrders = ImmutableList.copyOf(requireNonNull(sortOrders, "sortOrders is null")); | ||
| this.typeOperators = typeOperators; | ||
| this.maxPartialMemory = requireNonNull(maxPartialMemory, "maxPartialMemory is null"); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -90,7 +96,8 @@ public WorkProcessorOperator create( | |
| n, | ||
| sortChannels, | ||
| sortOrders, | ||
| typeOperators); | ||
| typeOperators, | ||
| maxPartialMemory); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -120,11 +127,10 @@ public void close() | |
| @Override | ||
| public Factory duplicate() | ||
| { | ||
| return new Factory(operatorId, planNodeId, sourceTypes, n, sortChannels, sortOrders, typeOperators); | ||
| return new Factory(operatorId, planNodeId, sourceTypes, n, sortChannels, sortOrders, typeOperators, maxPartialMemory); | ||
| } | ||
| } | ||
|
|
||
| private final TopNProcessor topNProcessor; | ||
| private final WorkProcessor<Page> pages; | ||
|
|
||
| private TopNOperator( | ||
|
|
@@ -134,21 +140,25 @@ private TopNOperator( | |
| int n, | ||
| List<Integer> sortChannels, | ||
| List<SortOrder> sortOrders, | ||
| TypeOperators typeOperators) | ||
| TypeOperators typeOperators, | ||
| Optional<DataSize> maxPartialMemory) | ||
| { | ||
| this.topNProcessor = new TopNProcessor( | ||
| requireNonNull(memoryTrackingContext, "memoryTrackingContext is null").aggregateUserMemoryContext(), | ||
| types, | ||
| n, | ||
| sortChannels, | ||
| sortOrders, | ||
| typeOperators); | ||
| requireNonNull(memoryTrackingContext, "memoryTrackingContext is null"); | ||
|
|
||
| if (n == 0) { | ||
| pages = WorkProcessor.of(); | ||
| } | ||
| else { | ||
| pages = sourcePages.transform(new TopNPages()); | ||
| TopNProcessor topNProcessor = new TopNProcessor( | ||
| memoryTrackingContext.aggregateUserMemoryContext(), | ||
| types, | ||
| n, | ||
| sortChannels, | ||
| sortOrders, | ||
| typeOperators); | ||
| long maxPartialMemoryWithDefaultValueIfAbsent = requireNonNull(maxPartialMemory, "maxPartialMemory is null") | ||
| .map(DataSize::toBytes).orElse(Long.MAX_VALUE); | ||
| pages = sourcePages.transform(new TopNPages(topNProcessor, maxPartialMemoryWithDefaultValueIfAbsent)); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -158,18 +168,44 @@ public WorkProcessor<Page> getOutputPages() | |
| return pages; | ||
| } | ||
|
|
||
| private class TopNPages | ||
| private static class TopNPages | ||
| implements WorkProcessor.Transformation<Page, Page> | ||
| { | ||
| private final TopNProcessor topNProcessor; | ||
| private final long maxPartialMemory; | ||
|
|
||
| private boolean isPartialFlushing; | ||
|
|
||
| private TopNPages(TopNProcessor topNProcessor, long maxPartialMemory) | ||
| { | ||
| this.topNProcessor = topNProcessor; | ||
| this.maxPartialMemory = maxPartialMemory; | ||
| } | ||
|
|
||
| private boolean isBuilderFull() | ||
| { | ||
| return topNProcessor.getEstimatedSizeInBytes() >= maxPartialMemory; | ||
| } | ||
|
|
||
| private void addPage(Page page) | ||
| { | ||
| checkState(!isPartialFlushing, "TopN buffer is already full"); | ||
| topNProcessor.addInput(page); | ||
| if (isBuilderFull()) { | ||
| isPartialFlushing = true; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public TransformationState<Page> process(Page inputPage) | ||
| { | ||
| if (inputPage != null) { | ||
| topNProcessor.addInput(inputPage); | ||
| return TransformationState.needsMoreData(); | ||
| if (!isPartialFlushing && inputPage != null) { | ||
| addPage(inputPage); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: you can inline
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer to have it as a separate method. |
||
| if (!isPartialFlushing) { | ||
| return TransformationState.needsMoreData(); | ||
| } | ||
| } | ||
|
JunhyungSong marked this conversation as resolved.
Outdated
|
||
|
|
||
| // no more input, return results | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a comment: |
||
| Page page = null; | ||
| while (page == null && !topNProcessor.noMoreOutput()) { | ||
| page = topNProcessor.getOutput(); | ||
|
|
@@ -179,6 +215,14 @@ public TransformationState<Page> process(Page inputPage) | |
| return TransformationState.ofResult(page, false); | ||
| } | ||
|
|
||
| if (isPartialFlushing) { | ||
| checkState(inputPage != null, "inputPage that triggered partial flushing is null"); | ||
| isPartialFlushing = false; | ||
| // resume receiving pages | ||
| return TransformationState.needsMoreData(); | ||
| } | ||
|
|
||
| // all input pages are consumed | ||
| return TransformationState.finished(); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
|
|
||
| import java.util.Iterator; | ||
| import java.util.List; | ||
| import java.util.function.Supplier; | ||
|
|
||
| import static com.google.common.base.Preconditions.checkArgument; | ||
| import static com.google.common.base.Verify.verify; | ||
|
|
@@ -35,39 +36,47 @@ | |
| */ | ||
| public class TopNProcessor | ||
| { | ||
| private final LocalMemoryContext localUserMemoryContext; | ||
| private final LocalMemoryContext localMemoryContext; | ||
| @Nullable | ||
| private final Supplier<GroupedTopNBuilder> topNBuilderSupplier; | ||
|
JunhyungSong marked this conversation as resolved.
Outdated
|
||
|
|
||
| @Nullable | ||
| private GroupedTopNBuilder topNBuilder; | ||
| @Nullable | ||
| private Iterator<Page> outputIterator; | ||
|
|
||
| public TopNProcessor( | ||
| AggregatedMemoryContext aggregatedMemoryContext, | ||
| List<Type> types, | ||
| int n, | ||
| List<Integer> sortChannels, | ||
| List<SortOrder> sortOrders, TypeOperators typeOperators) | ||
| List<SortOrder> sortOrders, | ||
| TypeOperators typeOperators) | ||
| { | ||
| requireNonNull(aggregatedMemoryContext, "aggregatedMemoryContext is null"); | ||
| this.localUserMemoryContext = aggregatedMemoryContext.newLocalMemoryContext(TopNProcessor.class.getSimpleName()); | ||
| this.localMemoryContext = aggregatedMemoryContext.newLocalMemoryContext(TopNProcessor.class.getSimpleName()); | ||
| checkArgument(n >= 0, "n must be positive"); | ||
|
|
||
| if (n == 0) { | ||
| outputIterator = emptyIterator(); | ||
| topNBuilderSupplier = null; | ||
| } | ||
| else { | ||
| topNBuilder = new GroupedTopNRowNumberBuilder( | ||
| GroupByHash noChannelGroupByHash = new NoChannelGroupByHash(); | ||
| PageWithPositionComparator comparator = new SimplePageWithPositionComparator(types, sortChannels, sortOrders, typeOperators); | ||
| topNBuilderSupplier = () -> new GroupedTopNRowNumberBuilder( | ||
| types, | ||
| new SimplePageWithPositionComparator(types, sortChannels, sortOrders, typeOperators), | ||
| comparator, | ||
| n, | ||
| false, | ||
| new NoChannelGroupByHash()); | ||
| noChannelGroupByHash); | ||
| } | ||
| } | ||
|
|
||
| public void addInput(Page page) | ||
| { | ||
| requireNonNull(topNBuilder, "topNBuilder is null"); | ||
| if (topNBuilder == null) { | ||
| topNBuilder = requireNonNull(topNBuilderSupplier.get(), "topNBuilderSupplier is null"); | ||
| } | ||
| boolean done = topNBuilder.processPage(requireNonNull(page, "page is null")).process(); | ||
| // there is no grouping so work will always be done | ||
| verify(done); | ||
|
|
@@ -78,28 +87,33 @@ public Page getOutput() | |
| { | ||
| if (outputIterator == null) { | ||
| // start flushing | ||
| outputIterator = topNBuilder.buildResult(); | ||
| outputIterator = topNBuilder == null ? emptyIterator() : topNBuilder.buildResult(); | ||
| } | ||
|
|
||
| Page output = null; | ||
| if (outputIterator.hasNext()) { | ||
| output = outputIterator.next(); | ||
| } | ||
| else { | ||
| outputIterator = emptyIterator(); | ||
| outputIterator = null; | ||
| topNBuilder = null; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After you build the result on line 90, it's not clear that any of the other side effects happening to topNBuilder will be reflected in the output (even though it may not occur in practice), it just looks more correct to an average programmer to clear the topNBuilder immediately after calling buildResult, so we should do that (unless it makes this code incorrect somehow).
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even though buildResult() is called, topNBuilder is still maintained in outputIterator. So, it needs to be memory-accounted until outputIterator is nullified. This is the reason why other operators like HashAggregationOperator and TopNRankingOperator maintain their builder until flushing is completed. |
||
| } | ||
| updateMemoryReservation(); | ||
| return output; | ||
| } | ||
|
|
||
| public boolean noMoreOutput() | ||
| { | ||
| return outputIterator != null && !outputIterator.hasNext(); | ||
| return topNBuilder == null; | ||
| } | ||
|
|
||
| public long getEstimatedSizeInBytes() | ||
| { | ||
| return topNBuilder == null ? 0 : topNBuilder.getEstimatedSizeInBytes(); | ||
| } | ||
|
|
||
| private void updateMemoryReservation() | ||
| { | ||
| requireNonNull(topNBuilder, "topNBuilder is null"); | ||
| localUserMemoryContext.setBytes(topNBuilder.getEstimatedSizeInBytes()); | ||
| localMemoryContext.setBytes(getEstimatedSizeInBytes()); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can manage flushing entirely within
processmethod, e.g:Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The problem is process method will be never called until finish method is called in non late materialization mode. Even in late materialization mode, it will keep sending input pages even if TopNOperator is in partial flushing mode.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should just make
TopNOperatoraWorkProcessorOperator(as for exampleFilterAndProjectOperator).Please run
BenchmarkTopNOperatorafterwardsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I implemented new Workprocessor.Process for TopNOperator(similar to FilterAndProjectOperator). BenchmarkTopNOperator showed almost no discrepancy.