-
Notifications
You must be signed in to change notification settings - Fork 29.1k
[SPARK-29277][SQL] Add early DSv2 filter and projection pushdown #25955
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7d7c914
e7beb5b
fb7f54d
621224a
4220723
c223e05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -681,10 +681,18 @@ class Analyzer( | |
| .map(v2Relation => i.copy(table = v2Relation)) | ||
| .getOrElse(i) | ||
|
|
||
| case desc @ DescribeTable(u: UnresolvedV2Relation, _) => | ||
| CatalogV2Util.loadRelation(u.catalog, u.tableName) | ||
| .map(rel => desc.copy(table = rel)) | ||
| .getOrElse(desc) | ||
|
|
||
| case alter @ AlterTable(_, _, u: UnresolvedV2Relation, _) => | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tested out a trait that worked for all of the plans that need to be resolved here, but the code was longer with the trait and implementations. If we need it later because we have more cases in this rule, it should be easy to add. I don't think we need it right now. |
||
| CatalogV2Util.loadRelation(u.catalog, u.tableName) | ||
| .map(rel => alter.copy(table = rel)) | ||
| .getOrElse(alter) | ||
|
|
||
| case u: UnresolvedV2Relation => | ||
| CatalogV2Util.loadTable(u.catalog, u.tableName).map { table => | ||
| DataSourceV2Relation.create(table) | ||
| }.getOrElse(u) | ||
| CatalogV2Util.loadRelation(u.catalog, u.tableName).getOrElse(u) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,10 +20,13 @@ package org.apache.spark.sql.execution | |
| import org.apache.spark.sql.ExperimentalMethods | ||
| import org.apache.spark.sql.catalyst.catalog.SessionCatalog | ||
| import org.apache.spark.sql.catalyst.optimizer._ | ||
| import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan | ||
| import org.apache.spark.sql.catalyst.rules.Rule | ||
| import org.apache.spark.sql.connector.catalog.CatalogManager | ||
| import org.apache.spark.sql.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning} | ||
| import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions | ||
| import org.apache.spark.sql.execution.datasources.SchemaPruning | ||
| import org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown | ||
| import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs} | ||
|
|
||
| class SparkOptimizer( | ||
|
|
@@ -32,10 +35,12 @@ class SparkOptimizer( | |
| experimentalMethods: ExperimentalMethods) | ||
| extends Optimizer(catalogManager) { | ||
|
|
||
| override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = | ||
| // TODO: move SchemaPruning into catalyst | ||
| SchemaPruning :: PruneFileSourcePartitions :: V2ScanRelationPushDown :: Nil | ||
|
|
||
| override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ | ||
| Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ | ||
| Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ | ||
| Batch("Schema Pruning", Once, SchemaPruning) :+ | ||
| Batch("PartitionPruning", Once, | ||
| PartitionPruning, | ||
| OptimizeSubqueries) :+ | ||
|
|
@@ -64,7 +69,8 @@ class SparkOptimizer( | |
| override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+ | ||
| ExtractPythonUDFFromJoinCondition.ruleName :+ | ||
| ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+ | ||
| ExtractPythonUDFs.ruleName | ||
| ExtractPythonUDFs.ruleName :+ | ||
| V2ScanRelationPushDown.ruleName | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only |
||
|
|
||
| /** | ||
| * Optimization batches that are executed before the regular optimization batches (also before | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for incorporating these!