Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,24 +30,29 @@ import org.apache.spark.sql.types._
*/
object NestedColumnAliasing {

def unapply(plan: LogicalPlan)
: Option[(Map[ExtractValue, Alias], Map[ExprId, Seq[Alias]])] = plan match {
def unapply(plan: LogicalPlan): Option[LogicalPlan] = plan match {
case Project(projectList, child)
if SQLConf.get.nestedSchemaPruningEnabled && canProjectPushThrough(child) =>
val exprCandidatesToPrune = projectList ++ child.expressions
getAliasSubMap(exprCandidatesToPrune, child.producedAttributes.toSeq)
getAliasSubMap(exprCandidatesToPrune, child.producedAttributes.toSeq).map {
case (nestedFieldToAlias, attrToAliases) =>
NestedColumnAliasing.replaceToAliases(plan, nestedFieldToAlias, attrToAliases)
}

case plan if SQLConf.get.nestedSchemaPruningEnabled && canPruneOn(plan) =>
val exprCandidatesToPrune = plan.expressions
getAliasSubMap(exprCandidatesToPrune, plan.producedAttributes.toSeq)
case p if SQLConf.get.nestedSchemaPruningEnabled && canPruneOn(p) =>
val exprCandidatesToPrune = p.expressions
getAliasSubMap(exprCandidatesToPrune, p.producedAttributes.toSeq).map {
case (nestedFieldToAlias, attrToAliases) =>
NestedColumnAliasing.replaceToAliases(p, nestedFieldToAlias, attrToAliases)
}

case _ => None
}

/**
* Replace nested columns to prune unused nested columns later.
*/
def replaceToAliases(
private def replaceToAliases(
plan: LogicalPlan,
nestedFieldToAlias: Map[ExtractValue, Alias],
attrToAliases: Map[ExprId, Seq[Alias]]): LogicalPlan = plan match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -649,8 +649,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
// Can't prune the columns on LeafNode
case p @ Project(_, _: LeafNode) => p

case p @ NestedColumnAliasing(nestedFieldToAlias, attrToAliases) =>
NestedColumnAliasing.replaceToAliases(p, nestedFieldToAlias, attrToAliases)
case NestedColumnAliasing(p) => p

// for all other logical plans that inherits the output from it's children
// Project over project is handled by the first case, skip it here.
Expand Down