-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Add streaming aggregation #17281
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add streaming aggregation #17281
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -155,6 +155,7 @@ public class HiveClientConfig | |
|
|
||
| private boolean s3SelectPushdownEnabled; | ||
| private int s3SelectPushdownMaxConnections = 500; | ||
| private boolean streamingAggregationEnabled; | ||
|
|
||
| private boolean isTemporaryStagingDirectoryEnabled = true; | ||
| private String temporaryStagingDirectoryPath = "/tmp/presto-${USER}"; | ||
|
|
@@ -206,6 +207,7 @@ public class HiveClientConfig | |
| private boolean userDefinedTypeEncodingEnabled; | ||
|
|
||
| private boolean columnIndexFilterEnabled; | ||
| private boolean fileSplittable = true; | ||
|
||
|
|
||
| @Min(0) | ||
| public int getMaxInitialSplits() | ||
|
|
@@ -1355,6 +1357,19 @@ public HiveClientConfig setS3SelectPushdownMaxConnections(int s3SelectPushdownMa | |
| return this; | ||
| } | ||
|
|
||
| public boolean isStreamingAggregationEnabled() | ||
| { | ||
| return streamingAggregationEnabled; | ||
| } | ||
|
|
||
| @Config("hive.streaming-aggregation-enabled") | ||
| @ConfigDescription("Enable streaming aggregation execution") | ||
| public HiveClientConfig setStreamingAggregationEnabled(boolean streamingAggregationEnabled) | ||
| { | ||
| this.streamingAggregationEnabled = streamingAggregationEnabled; | ||
| return this; | ||
| } | ||
|
|
||
| public boolean isTemporaryStagingDirectoryEnabled() | ||
| { | ||
| return isTemporaryStagingDirectoryEnabled; | ||
|
|
@@ -1758,4 +1773,17 @@ public HiveClientConfig setUseRecordPageSourceForCustomSplit(boolean useRecordPa | |
| this.useRecordPageSourceForCustomSplit = useRecordPageSourceForCustomSplit; | ||
| return this; | ||
| } | ||
|
|
||
| public boolean isFileSplittable() | ||
| { | ||
| return fileSplittable; | ||
| } | ||
|
|
||
| @Config("hive.file-splittable") | ||
| @ConfigDescription("By default, this value is true. Set to false to make a hive file un-splittable when coordinator schedules splits.") | ||
| public HiveClientConfig setFileSplittable(boolean fileSplittable) | ||
kewang1024 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| { | ||
| this.fileSplittable = fileSplittable; | ||
| return this; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,13 +63,15 @@ | |
| import com.facebook.presto.spi.Constraint; | ||
| import com.facebook.presto.spi.DiscretePredicates; | ||
| import com.facebook.presto.spi.InMemoryRecordSet; | ||
| import com.facebook.presto.spi.LocalProperty; | ||
| import com.facebook.presto.spi.MaterializedViewNotFoundException; | ||
| import com.facebook.presto.spi.MaterializedViewStatus; | ||
| import com.facebook.presto.spi.PrestoException; | ||
| import com.facebook.presto.spi.QueryId; | ||
| import com.facebook.presto.spi.RecordCursor; | ||
| import com.facebook.presto.spi.SchemaTableName; | ||
| import com.facebook.presto.spi.SchemaTablePrefix; | ||
| import com.facebook.presto.spi.SortingProperty; | ||
| import com.facebook.presto.spi.SystemTable; | ||
| import com.facebook.presto.spi.TableLayoutFilterCoverage; | ||
| import com.facebook.presto.spi.TableNotFoundException; | ||
|
|
@@ -222,6 +224,7 @@ | |
| import static com.facebook.presto.hive.HiveSessionProperties.isSortedWriteToTempPathEnabled; | ||
| import static com.facebook.presto.hive.HiveSessionProperties.isSortedWritingEnabled; | ||
| import static com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled; | ||
| import static com.facebook.presto.hive.HiveSessionProperties.isStreamingAggregationEnabled; | ||
| import static com.facebook.presto.hive.HiveSessionProperties.isUsePageFileForHiveUnsupportedType; | ||
| import static com.facebook.presto.hive.HiveSessionProperties.shouldCreateEmptyBucketFilesForTemporaryTable; | ||
| import static com.facebook.presto.hive.HiveStorageFormat.AVRO; | ||
|
|
@@ -2758,14 +2761,46 @@ public ConnectorTableLayout getTableLayout(ConnectorSession session, ConnectorTa | |
| predicate = createPredicate(partitionColumns, partitions); | ||
| } | ||
|
|
||
| // Expose ordering property of the table. | ||
| ImmutableList.Builder<LocalProperty<ColumnHandle>> localProperties = ImmutableList.builder(); | ||
| Optional<Set<ColumnHandle>> streamPartitionColumns = Optional.empty(); | ||
| if (table.getStorage().getBucketProperty().isPresent() && !table.getStorage().getBucketProperty().get().getSortedBy().isEmpty()) { | ||
| ImmutableSet.Builder<ColumnHandle> streamPartitionColumnsBuilder = ImmutableSet.builder(); | ||
|
|
||
| // streamPartitioningColumns is how we partition the data across splits. | ||
| // localProperty is how we partition the data within a split. | ||
| // 1. add partition columns to streamPartitionColumns | ||
| partitionColumns.forEach(streamPartitionColumnsBuilder::add); | ||
|
|
||
| // 2. add sorted columns to streamPartitionColumns and localProperties | ||
| HiveBucketProperty bucketProperty = table.getStorage().getBucketProperty().get(); | ||
| Map<String, ColumnHandle> columnHandles = hiveColumnHandles(table).stream() | ||
| .collect(toImmutableMap(HiveColumnHandle::getName, identity())); | ||
| bucketProperty.getSortedBy().forEach(sortingColumn -> { | ||
| ColumnHandle columnHandle = columnHandles.get(sortingColumn.getColumnName()); | ||
| localProperties.add(new SortingProperty<>(columnHandle, sortingColumn.getOrder().getSortOrder())); | ||
| streamPartitionColumnsBuilder.add(columnHandle); | ||
| }); | ||
|
|
||
| // We currently only set streamPartitionColumns when it enables streaming aggregation and also it's eligible to enable streaming aggregation | ||
| // 1. When the bucket columns are the same as the prefix of the sort columns | ||
| // 2. When all rows of the same value group are guaranteed to be in the same split. We disable splitting a file when isStreamingAggregationEnabled is true to make sure the property is guaranteed. | ||
| List<String> sortColumns = bucketProperty.getSortedBy().stream().map(SortingColumn::getColumnName).collect(toImmutableList()); | ||
| if (bucketProperty.getBucketedBy().size() <= sortColumns.size() | ||
| && bucketProperty.getBucketedBy().containsAll(sortColumns.subList(0, bucketProperty.getBucketedBy().size())) | ||
|
||
| && isStreamingAggregationEnabled(session)) { | ||
| streamPartitionColumns = Optional.of(streamPartitionColumnsBuilder.build()); | ||
| } | ||
| } | ||
|
|
||
| return new ConnectorTableLayout( | ||
| hiveLayoutHandle, | ||
| Optional.empty(), | ||
| predicate, | ||
| tablePartitioning, | ||
| Optional.empty(), | ||
| streamPartitionColumns, | ||
| discretePredicates, | ||
| ImmutableList.of(), | ||
| localProperties.build(), | ||
| Optional.of(hiveLayoutHandle.getRemainingPredicate())); | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.