-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-39859][SQL] Support v2 DESCRIBE TABLE EXTENDED for columns #40058
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,7 +36,7 @@ import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, | |
| import org.apache.spark.sql.connector.catalog.index.SupportsIndex | ||
| import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} | ||
| import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate} | ||
| import org.apache.spark.sql.connector.read.LocalScan | ||
| import org.apache.spark.sql.connector.read.{LocalScan, SupportsReportStatistics} | ||
| import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} | ||
| import org.apache.spark.sql.connector.write.V1Write | ||
| import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} | ||
|
|
@@ -329,10 +329,17 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat | |
| } | ||
| DescribeTableExec(output, r.table, isExtended) :: Nil | ||
|
|
||
| case DescribeColumn(_: ResolvedTable, column, isExtended, output) => | ||
| case DescribeColumn(r: ResolvedTable, column, isExtended, output) => | ||
| column match { | ||
| case c: Attribute => | ||
| DescribeColumnExec(output, c, isExtended) :: Nil | ||
| val colStats = | ||
| r.table.asReadable.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match { | ||
| case s: SupportsReportStatistics => | ||
| val stats = s.estimateStatistics() | ||
| Some(stats.columnStats().get(FieldReference.column(c.name))) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is controlled by |
||
| case _ => None | ||
| } | ||
| DescribeColumnExec(output, c, isExtended, colStats) :: Nil | ||
| case nested => | ||
| throw QueryCompilationErrors.commandNotSupportNestedColumnError( | ||
| "DESC TABLE COLUMN", toPrettySQL(nested)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -149,13 +149,14 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase | |
| } | ||
| } | ||
|
|
||
| // TODO(SPARK-39859): Support v2 `DESCRIBE TABLE EXTENDED` for columns | ||
| test("describe extended (formatted) a column") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't have to be done immediately, but it's better to move this test to the parent suite, to make sure v1 and v2 commands have the same behavior.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 |
||
| withNamespaceAndTable("ns", "tbl") { tbl => | ||
| sql(s""" | ||
| |CREATE TABLE $tbl | ||
| |(key INT COMMENT 'column_comment', col STRING) | ||
| |$defaultUsing""".stripMargin) | ||
|
|
||
| sql(s"INSERT INTO $tbl values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (null, 'ddd')") | ||
| val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key") | ||
| assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq( | ||
| ("info_name", StringType), | ||
|
|
@@ -165,7 +166,13 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase | |
| Seq( | ||
| Row("col_name", "key"), | ||
| Row("data_type", "int"), | ||
| Row("comment", "column_comment"))) | ||
| Row("comment", "column_comment"), | ||
| Row("min", "NULL"), | ||
| Row("max", "NULL"), | ||
| Row("num_nulls", "1"), | ||
| Row("distinct_count", "4"), | ||
| Row("avg_col_len", "NULL"), | ||
| Row("max_col_len", "NULL"))) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this may not be very cheap. Shall we only do it if
isExtendedis true?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and what if the table is not readable (e.g. write-only)? We should not fail but show no column stats.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW, I think it's cleaner to pass v2
TabletoDescribeColumnExecand move this code block toDescribeColumnExecThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cloud-fan Thanks for your comments. I will have a follow-up to fix this and also move the test to parent suite.