-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-22529] [SQL] Relation stats should be consistent with other plans based on cbo config #19757
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,8 +18,10 @@ | |
| package org.apache.spark.sql.hive.execution | ||
|
|
||
| import org.apache.spark.sql.QueryTest | ||
| import org.apache.spark.sql.catalyst.TableIdentifier | ||
| import org.apache.spark.sql.catalyst.parser.ParseException | ||
| import org.apache.spark.sql.hive.test.TestHiveSingleton | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.test.SQLTestUtils | ||
|
|
||
| /** | ||
|
|
@@ -29,21 +31,32 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto | |
| import testImplicits._ | ||
|
|
||
| test("show cost in explain command") { | ||
| val explainCostCommand = "EXPLAIN COST SELECT * FROM src" | ||
| // For readability, we only show optimized plan and physical plan in explain cost command | ||
| checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), | ||
| checkKeywordsExist(sql(explainCostCommand), | ||
| "Optimized Logical Plan", "Physical Plan") | ||
| checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), | ||
| checkKeywordsNotExist(sql(explainCostCommand), | ||
| "Parsed Logical Plan", "Analyzed Logical Plan") | ||
|
|
||
| // Only has sizeInBytes before ANALYZE command | ||
| checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes") | ||
| checkKeywordsNotExist(sql("EXPLAIN COST SELECT * FROM src "), "rowCount") | ||
| withSQLConf(SQLConf.CBO_ENABLED.key -> "true") { | ||
| // Only has sizeInBytes before ANALYZE command | ||
| checkKeywordsExist(sql(explainCostCommand), "sizeInBytes") | ||
| checkKeywordsNotExist(sql(explainCostCommand), "rowCount") | ||
|
|
||
| // Has both sizeInBytes and rowCount after ANALYZE command | ||
| sql("ANALYZE TABLE src COMPUTE STATISTICS") | ||
| checkKeywordsExist(sql("EXPLAIN COST SELECT * FROM src "), "sizeInBytes", "rowCount") | ||
| // Has both sizeInBytes and rowCount after ANALYZE command | ||
| sql("ANALYZE TABLE src COMPUTE STATISTICS") | ||
| checkKeywordsExist(sql(explainCostCommand), "sizeInBytes", "rowCount") | ||
| } | ||
|
|
||
| spark.sessionState.catalog.refreshTable(TableIdentifier("src")) | ||
|
|
||
| withSQLConf(SQLConf.CBO_ENABLED.key -> "false") { | ||
| // Don't show rowCount if cbo is disabled | ||
| checkKeywordsExist(sql(explainCostCommand), "sizeInBytes") | ||
| checkKeywordsNotExist(sql(explainCostCommand), "rowCount") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you assume there is no table relation cache in this test?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, should I refresh it for robustness?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes please. |
||
| } | ||
|
|
||
| // No cost information | ||
| // No statistics information if "cost" is not specified | ||
| checkKeywordsNotExist(sql("EXPLAIN SELECT * FROM src "), "sizeInBytes", "rowCount") | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If
rowCountis available, why we ignore them?