-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Apply info column filters during split generation #23411
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,9 @@ | |
| import java.util.OptionalInt; | ||
|
|
||
| import static com.facebook.presto.hive.BlockLocation.fromHiveBlockLocations; | ||
| import static com.facebook.presto.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_INDEX; | ||
| import static com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_INDEX; | ||
| import static com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_INDEX; | ||
| import static com.facebook.presto.hive.HiveUtil.isSelectSplittable; | ||
| import static com.facebook.presto.hive.HiveUtil.isSplittable; | ||
| import static com.facebook.presto.hive.util.CustomSplitConversionUtils.extractCustomSplitInfo; | ||
|
|
@@ -52,7 +55,7 @@ public class InternalHiveSplitFactory | |
| { | ||
| private final FileSystem fileSystem; | ||
| private final InputFormat<?, ?> inputFormat; | ||
| private final Optional<Domain> pathDomain; | ||
| private final Map<Integer, Domain> infoColumnConstraints; | ||
| private final NodeSelectionStrategy nodeSelectionStrategy; | ||
| private final boolean s3SelectPushdownEnabled; | ||
| private final HiveSplitPartitionInfo partitionInfo; | ||
|
|
@@ -63,7 +66,7 @@ public class InternalHiveSplitFactory | |
| public InternalHiveSplitFactory( | ||
| FileSystem fileSystem, | ||
| InputFormat<?, ?> inputFormat, | ||
| Optional<Domain> pathDomain, | ||
| Map<Integer, Domain> infoColumnConstraints, | ||
| NodeSelectionStrategy nodeSelectionStrategy, | ||
| DataSize minimumTargetSplitSize, | ||
| boolean s3SelectPushdownEnabled, | ||
|
|
@@ -73,7 +76,7 @@ public InternalHiveSplitFactory( | |
| { | ||
| this.fileSystem = requireNonNull(fileSystem, "fileSystem is null"); | ||
| this.inputFormat = requireNonNull(inputFormat, "inputFormat is null"); | ||
| this.pathDomain = requireNonNull(pathDomain, "pathDomain is null"); | ||
| this.infoColumnConstraints = requireNonNull(infoColumnConstraints, "infoColumnConstraints is null"); | ||
| this.nodeSelectionStrategy = requireNonNull(nodeSelectionStrategy, "nodeSelectionStrategy is null"); | ||
| this.s3SelectPushdownEnabled = s3SelectPushdownEnabled; | ||
| this.partitionInfo = partitionInfo; | ||
|
|
@@ -147,7 +150,8 @@ private Optional<InternalHiveSplit> createInternalHiveSplit( | |
| Map<String, String> customSplitInfo) | ||
| { | ||
| String pathString = path.toString(); | ||
| if (!pathMatchesPredicate(pathDomain, pathString)) { | ||
|
|
||
| if (!infoColumnsMatchPredicates(infoColumnConstraints, pathString, fileSize, fileModificationTime)) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
|
|
@@ -252,4 +256,32 @@ private static boolean pathMatchesPredicate(Optional<Domain> pathDomain, String | |
|
|
||
| return pathDomain.get().includesNullableValue(utf8Slice(path)); | ||
| } | ||
|
|
||
| private static boolean infoColumnsMatchPredicates(Map<Integer, Domain> constraints, | ||
| String path, | ||
| long fileSize, | ||
| long fileModificationTime) | ||
| { | ||
| if (constraints.isEmpty()) { | ||
| return true; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be false instead of true ? I'm not super familiar with this code. But when would constraints be empty ? Would be great to have a test for this situation.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Constraints would be empty when there are no filters on info columns in the query. We should return true, so the split will be scheduled to worker. Any simple SELECT query without filters or filters on non-info columns will already be testing this scenario. |
||
| } | ||
|
|
||
| boolean matches = true; | ||
|
|
||
| for (Map.Entry<Integer, Domain> constraint : constraints.entrySet()) { | ||
| switch (constraint.getKey()) { | ||
| case PATH_COLUMN_INDEX: | ||
| matches &= constraint.getValue().includesNullableValue(utf8Slice(path)); | ||
| break; | ||
| case FILE_SIZE_COLUMN_INDEX: | ||
| matches &= constraint.getValue().includesNullableValue(fileSize); | ||
| break; | ||
| case FILE_MODIFIED_TIME_COLUMN_INDEX: | ||
| matches &= constraint.getValue().includesNullableValue(fileModificationTime); | ||
| break; | ||
| } | ||
| } | ||
|
|
||
| return matches; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
infoColumn -> metadataColumn