-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Fix column in parquet dereference pushdown #16377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,7 +79,10 @@ public HiveParquetDereferencePushDown(HiveTransactionManager transactionManager, | |
| this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); | ||
| } | ||
|
|
||
| private static Map<RowExpression, Subfield> extractDereferences(ConnectorSession session, ExpressionOptimizer expressionOptimizer, Set<RowExpression> expressions) | ||
| private static Map<RowExpression, Subfield> extractDereferences( | ||
| Map<String, HiveColumnHandle> regularHiveColumnHandles, | ||
| ConnectorSession session, ExpressionOptimizer expressionOptimizer, | ||
| Set<RowExpression> expressions) | ||
| { | ||
| Set<RowExpression> dereferenceAndVariableExpressions = new HashSet<>(); | ||
| expressions.forEach(e -> e.accept(new ExtractDereferenceAndVariables(session, expressionOptimizer), dereferenceAndVariableExpressions)); | ||
|
|
@@ -90,7 +93,8 @@ private static Map<RowExpression, Subfield> extractDereferences(ConnectorSession | |
| .filter(expression -> expression instanceof SpecialFormExpression && ((SpecialFormExpression) expression).getForm() == DEREFERENCE) | ||
| .collect(Collectors.toList()); | ||
|
|
||
| return dereferences.stream().collect(toMap(identity(), dereference -> createNestedColumn(dereference, expressionOptimizer, session))); | ||
| return dereferences.stream().collect(toMap(identity(), dereference -> createNestedColumn( | ||
| regularHiveColumnHandles, dereference, expressionOptimizer, session))); | ||
| } | ||
|
|
||
| private static boolean prefixExists(RowExpression expression, Set<RowExpression> allExpressions) | ||
|
|
@@ -128,7 +132,9 @@ public Void visitVariableReference(VariableReferenceExpression reference, int[] | |
| return referenceCount[0] > 1; | ||
| } | ||
|
|
||
| private static Subfield createNestedColumn(RowExpression rowExpression, ExpressionOptimizer expressionOptimizer, ConnectorSession session) | ||
| private static Subfield createNestedColumn(Map<String, HiveColumnHandle> regularHiveColumnHandles, | ||
| RowExpression rowExpression, ExpressionOptimizer expressionOptimizer, | ||
| ConnectorSession session) | ||
| { | ||
| if (!(rowExpression instanceof SpecialFormExpression) || ((SpecialFormExpression) rowExpression).getForm() != DEREFERENCE) { | ||
| throw new IllegalArgumentException("expecting SpecialFormExpression(DEREFERENCE), but got: " + rowExpression); | ||
|
|
@@ -138,7 +144,11 @@ private static Subfield createNestedColumn(RowExpression rowExpression, Expressi | |
| while (true) { | ||
| if (rowExpression instanceof VariableReferenceExpression) { | ||
| Collections.reverse(elements); | ||
| return new Subfield(((VariableReferenceExpression) rowExpression).getName(), unmodifiableList(elements)); | ||
| String name = ((VariableReferenceExpression) rowExpression).getName(); | ||
| HiveColumnHandle handle = regularHiveColumnHandles.get(name); | ||
| checkArgument(handle != null, "Missing Hive column handle: " + name); | ||
| String originalColumnName = regularHiveColumnHandles.get(name).getName(); | ||
| return new Subfield(originalColumnName, unmodifiableList(elements)); | ||
| } | ||
|
|
||
| if (rowExpression instanceof SpecialFormExpression && ((SpecialFormExpression) rowExpression).getForm() == DEREFERENCE) { | ||
|
|
@@ -329,18 +339,22 @@ public PlanNode visitProject(ProjectNode project, Void context) | |
| if (!isParquetDereferenceEnabled(session, tableScan.getTable())) { | ||
| return visitPlan(project, context); | ||
| } | ||
| Map<String, HiveColumnHandle> regularHiveColumnHandles = new HashMap<>(); | ||
| regularHiveColumnHandles.putAll(tableScan.getAssignments().entrySet().stream() | ||
| .collect(toMap(e -> e.getKey().getName(), e -> (HiveColumnHandle) e.getValue()))); | ||
| regularHiveColumnHandles.putAll(tableScan.getAssignments().values().stream() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this mapping (columnName in HiveColumnHandle -> HiveColumnHandle)? Expressions should be referring to the name in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry are you referring to the second |
||
| .map(columnHandle -> (HiveColumnHandle) columnHandle) | ||
| .collect(toMap(HiveColumnHandle::getName, identity()))); | ||
|
|
||
| Map<RowExpression, Subfield> dereferenceToNestedColumnMap = extractDereferences( | ||
| regularHiveColumnHandles, | ||
| session, | ||
| rowExpressionService.getExpressionOptimizer(), | ||
| new HashSet<>(project.getAssignments().getExpressions())); | ||
| if (dereferenceToNestedColumnMap.isEmpty()) { | ||
| return visitPlan(project, context); | ||
| } | ||
|
|
||
| Map<String, HiveColumnHandle> regularHiveColumnHandles = tableScan.getAssignments().entrySet().stream() | ||
| .collect(toMap(e -> e.getKey().getName(), e -> (HiveColumnHandle) e.getValue())); | ||
|
|
||
| List<VariableReferenceExpression> newOutputVariables = new ArrayList<>(tableScan.getOutputVariables()); | ||
| Map<VariableReferenceExpression, ColumnHandle> newAssignments = new HashMap<>(tableScan.getAssignments()); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add check to make sure
regularHiveColumnHandles.get(name)returns non-null?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a Preconditions check