-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19080][SQL] simplify data source analysis #16269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c55a1f9
3336699
cede3c9
dcea3a6
19e4924
4b68c16
48535aa
55a1db3
6274f2b
9742f78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -376,28 +376,6 @@ trait CheckAnalysis extends PredicateHelper { | |
| |Conflicting attributes: ${conflictingAttributes.mkString(",")} | ||
| """.stripMargin) | ||
|
|
||
| case InsertIntoTable(t, _, _, _, _) | ||
| if !t.isInstanceOf[LeafNode] || | ||
| t.isInstanceOf[Range] || | ||
| t == OneRowRelation || | ||
| t.isInstanceOf[LocalRelation] => | ||
| failAnalysis(s"Inserting into an RDD-based table is not allowed.") | ||
|
|
||
| case i @ InsertIntoTable(table, partitions, query, _, _) => | ||
| val numStaticPartitions = partitions.values.count(_.isDefined) | ||
| if (table.output.size != (query.output.size + numStaticPartitions)) { | ||
| failAnalysis( | ||
| s"$table requires that the data to be inserted have the same number of " + | ||
| s"columns as the target table: target table has ${table.output.size} " + | ||
| s"column(s) but the inserted data has " + | ||
| s"${query.output.size + numStaticPartitions} column(s), including " + | ||
| s"$numStaticPartitions partition column(s) having constant value(s).") | ||
| } | ||
|
|
||
| case o if !o.resolved => | ||
| failAnalysis( | ||
| s"unresolved operator ${operator.simpleString}") | ||
|
|
||
| case o if o.expressions.exists(!_.deterministic) && | ||
| !o.isInstanceOf[Project] && !o.isInstanceOf[Filter] && | ||
| !o.isInstanceOf[Aggregate] && !o.isInstanceOf[Window] => | ||
|
|
@@ -413,6 +391,10 @@ trait CheckAnalysis extends PredicateHelper { | |
| } | ||
| } | ||
| extendedCheckRules.foreach(_(plan)) | ||
| plan.foreachUp { | ||
| case o if !o.resolved => failAnalysis(s"unresolved operator ${o.simpleString}") | ||
| case _ => | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This movement is great, since |
||
|
|
||
| plan.foreach(_.setAnalyzed()) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -363,7 +363,8 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode { | |
| } | ||
|
|
||
| /** | ||
| * Insert some data into a table. | ||
| * Insert some data into a table. Note that this plan is unresolved and has to be replaced by the | ||
| * concrete implementations during analysis. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To other reviewers, |
||
| * | ||
| * @param table the logical plan representing the table. In the future this should be a | ||
| * [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables | ||
|
|
@@ -374,25 +375,24 @@ case class BroadcastHint(child: LogicalPlan) extends UnaryNode { | |
| * Map('a' -> Some('1'), 'b' -> Some('2')), | ||
| * and `INSERT INTO tbl PARTITION (a=1, b) AS ...` | ||
| * would have Map('a' -> Some('1'), 'b' -> None). | ||
| * @param child the logical plan representing data to write to. | ||
| * @param query the logical plan representing data to write to. | ||
| * @param overwrite overwrite existing table or partitions. | ||
| * @param ifNotExists If true, only write if the table or partition does not exist. | ||
| */ | ||
| case class InsertIntoTable( | ||
| table: LogicalPlan, | ||
| partition: Map[String, Option[String]], | ||
| child: LogicalPlan, | ||
| query: LogicalPlan, | ||
| overwrite: Boolean, | ||
| ifNotExists: Boolean) | ||
| extends LogicalPlan { | ||
|
|
||
| override def children: Seq[LogicalPlan] = child :: Nil | ||
| override def output: Seq[Attribute] = Seq.empty | ||
|
|
||
| assert(overwrite || !ifNotExists) | ||
| assert(partition.values.forall(_.nonEmpty) || !ifNotExists) | ||
|
|
||
| override lazy val resolved: Boolean = childrenResolved && table.resolved | ||
| // We don't want `table` in children as sometimes we don't want to transform it. | ||
| override def children: Seq[LogicalPlan] = query :: Nil | ||
| override def output: Seq[Attribute] = Seq.empty | ||
| override lazy val resolved: Boolean = false | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cloud-fan After this change, we are unable to reach the check in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. now we will resolve |
||
| } | ||
|
|
||
| /** | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,7 +45,8 @@ import org.apache.spark.unsafe.types.UTF8String | |
| * Replaces generic operations with specific variants that are designed to work with Spark | ||
| * SQL Data Sources. | ||
| * | ||
| * Note that, this rule must be run after [[PreprocessTableInsertion]]. | ||
| * Note that, this rule must be run after `PreprocessTableCreation` and | ||
| * `PreprocessTableInsertion`. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same updates are needed here. |
||
| */ | ||
| case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { | ||
|
|
||
|
|
@@ -130,6 +131,17 @@ case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] { | |
| } | ||
|
|
||
| override def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
| case CreateTable(tableDesc, mode, None) if DDLUtils.isDatasourceTable(tableDesc) => | ||
| CreateDataSourceTableCommand(tableDesc, ignoreIfExists = mode == SaveMode.Ignore) | ||
|
|
||
| case CreateTable(tableDesc, mode, Some(query)) | ||
| if query.resolved && DDLUtils.isDatasourceTable(tableDesc) => | ||
| CreateDataSourceTableAsSelectCommand(tableDesc, mode, query) | ||
|
|
||
| case InsertIntoTable(l @ LogicalRelation(_: InsertableRelation, _, _), | ||
| parts, query, overwrite, false) if parts.isEmpty => | ||
| InsertIntoDataSourceCommand(l, query, overwrite) | ||
|
|
||
| case InsertIntoTable( | ||
| l @ LogicalRelation(t: HadoopFsRelation, _, table), parts, query, overwrite, false) => | ||
| // If the InsertIntoTable command is for a partitioned HadoopFsRelation and | ||
|
|
@@ -273,10 +285,6 @@ object DataSourceStrategy extends Strategy with Logging { | |
| Map.empty, | ||
| None) :: Nil | ||
|
|
||
| case InsertIntoTable(l @ LogicalRelation(t: InsertableRelation, _, _), | ||
| part, query, overwrite, false) if part.isEmpty => | ||
| ExecutedCommandExec(InsertIntoDataSourceCommand(l, query, overwrite)) :: Nil | ||
|
|
||
| case _ => Nil | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To other reviewers: This is moved to
PreprocessTableInsertion.After the above two moves,
def checkAnalysis(plan: LogicalPlan)does not have any DDL/DML error handling. It becomes cleaner