-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21180][SQL] Remove conf from stats functions since now we have conf in LogicalPlan #18391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Attri | |
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils} | ||
| import org.apache.spark.sql.catalyst.util.quoteIdentifier | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.StructType | ||
|
|
||
|
|
||
|
|
@@ -436,7 +435,7 @@ case class CatalogRelation( | |
| createTime = -1 | ||
| )) | ||
|
|
||
| override def computeStats(conf: SQLConf): Statistics = { | ||
| override def computeStats: Statistics = { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this file's
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I overlooked.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! I checked the other interface file, there are several files with this name... |
||
| // For data source tables, we will create a `LogicalRelation` and won't call this method, for | ||
| // hive serde tables, we will always generate a statistics. | ||
| // TODO: unify the table stats generation. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,6 @@ import org.apache.spark.sql.Row | |
| import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} | ||
| import org.apache.spark.sql.catalyst.analysis | ||
| import org.apache.spark.sql.catalyst.expressions.{Attribute, Literal} | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.{StructField, StructType} | ||
|
|
||
| object LocalRelation { | ||
|
|
@@ -67,7 +66,7 @@ case class LocalRelation(output: Seq[Attribute], data: Seq[InternalRow] = Nil) | |
| } | ||
| } | ||
|
|
||
| override def computeStats(conf: SQLConf): Statistics = | ||
| override def computeStats: Statistics = | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we remove
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked all modified files and removed unused imports. Thanks! |
||
| Statistics(sizeInBytes = | ||
| output.map(n => BigInt(n.dataType.defaultSize)).sum * data.length) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ import org.apache.spark.sql.catalyst.analysis._ | |
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
| import org.apache.spark.sql.catalyst.trees.CurrentOrigin | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types.StructType | ||
|
|
||
|
|
||
|
|
@@ -90,8 +89,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai | |
| * first time. If the configuration changes, the cache can be invalidated by calling | ||
| * [[invalidateStatsCache()]]. | ||
| */ | ||
| final def stats(conf: SQLConf): Statistics = statsCache.getOrElse { | ||
| statsCache = Some(computeStats(conf)) | ||
| final def stats: Statistics = statsCache.getOrElse { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| statsCache = Some(computeStats) | ||
| statsCache.get | ||
| } | ||
|
|
||
|
|
@@ -108,11 +107,11 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with QueryPlanConstrai | |
| * | ||
| * [[LeafNode]]s must override this. | ||
| */ | ||
| protected def computeStats(conf: SQLConf): Statistics = { | ||
| protected def computeStats: Statistics = { | ||
| if (children.isEmpty) { | ||
| throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.") | ||
| } | ||
| Statistics(sizeInBytes = children.map(_.stats(conf).sizeInBytes).product) | ||
| Statistics(sizeInBytes = children.map(_.stats.sizeInBytes).product) | ||
| } | ||
|
|
||
| override def verboseStringWithSuffix: String = { | ||
|
|
@@ -333,21 +332,21 @@ abstract class UnaryNode extends LogicalPlan { | |
|
|
||
| override protected def validConstraints: Set[Expression] = child.constraints | ||
|
|
||
| override def computeStats(conf: SQLConf): Statistics = { | ||
| override def computeStats: Statistics = { | ||
| // There should be some overhead in Row object, the size should not be zero when there is | ||
| // no columns, this help to prevent divide-by-zero error. | ||
| val childRowSize = child.output.map(_.dataType.defaultSize).sum + 8 | ||
| val outputRowSize = output.map(_.dataType.defaultSize).sum + 8 | ||
| // Assume there will be the same number of rows as child has. | ||
| var sizeInBytes = (child.stats(conf).sizeInBytes * outputRowSize) / childRowSize | ||
| var sizeInBytes = (child.stats.sizeInBytes * outputRowSize) / childRowSize | ||
| if (sizeInBytes == 0) { | ||
| // sizeInBytes can't be zero, or sizeInBytes of BinaryNode will also be zero | ||
| // (product of children). | ||
| sizeInBytes = 1 | ||
| } | ||
|
|
||
| // Don't propagate rowCount and attributeStats, since they are not estimated here. | ||
| Statistics(sizeInBytes = sizeInBytes, hints = child.stats(conf).hints) | ||
| Statistics(sizeInBytes = sizeInBytes, hints = child.stats.hints) | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,6 @@ | |
| package org.apache.spark.sql.catalyst.plans.logical | ||
|
|
||
| import org.apache.spark.sql.catalyst.expressions.Attribute | ||
| import org.apache.spark.sql.internal.SQLConf | ||
|
|
||
| /** | ||
| * A general hint for the child that is not yet resolved. This node is generated by the parser and | ||
|
|
@@ -44,8 +43,8 @@ case class ResolvedHint(child: LogicalPlan, hints: HintInfo = HintInfo()) | |
|
|
||
| override lazy val canonicalized: LogicalPlan = child.canonicalized | ||
|
|
||
| override def computeStats(conf: SQLConf): Statistics = { | ||
| val stats = child.stats(conf) | ||
| override def computeStats: Statistics = { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
| val stats = child.stats | ||
| stats.copy(hints = hints) | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we remove import org.apache.spark.sql.internal.SQLConf, too?